[clang] [llvm] [X86][AVX10.2] Support AVX10.2-BF16 new instructions. (PR #101603)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 1 19:27:59 PDT 2024
https://github.com/FreddyLeaf created https://github.com/llvm/llvm-project/pull/101603
- Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions
- Support AVX10.2-BF16 new instructions.
>From bd62a35cca5b757a3316d650b844fb81485ffd4c Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sat, 27 Jul 2024 22:21:32 +0800
Subject: [PATCH 1/2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new
instructions
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
---
clang/docs/ReleaseNotes.rst | 2 +
clang/include/clang/Basic/BuiltinsX86.def | 8 +
clang/include/clang/Driver/Options.td | 6 +
clang/lib/Basic/Targets/X86.cpp | 12 +
clang/lib/Basic/Targets/X86.h | 2 +
clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 +-
clang/lib/Headers/CMakeLists.txt | 2 +
clang/lib/Headers/avx10_2_512niintrin.h | 35 +++
clang/lib/Headers/avx10_2niintrin.h | 83 +++++++
clang/lib/Headers/immintrin.h | 8 +
clang/lib/Sema/SemaX86.cpp | 3 +
.../test/CodeGen/X86/avx10_2_512ni-builtins.c | 24 ++
clang/test/CodeGen/X86/avx10_2ni-builtins.c | 105 +++++++++
clang/test/CodeGen/attr-target-x86.c | 8 +-
clang/test/Driver/x86-target-features.c | 7 +
clang/test/Preprocessor/x86_target_features.c | 9 +
llvm/docs/ReleaseNotes.rst | 2 +
llvm/include/llvm/IR/IntrinsicsX86.td | 30 ++-
.../Support/X86DisassemblerDecoderCommon.h | 45 +++-
.../llvm/TargetParser/X86TargetParser.def | 2 +
.../X86/Disassembler/X86Disassembler.cpp | 3 +
.../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 5 +-
.../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 10 +-
llvm/lib/Target/X86/X86.td | 6 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
llvm/lib/Target/X86/X86ISelLowering.h | 2 +
llvm/lib/Target/X86/X86InstrAVX10.td | 33 +++
llvm/lib/Target/X86/X86InstrFormats.td | 2 +
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 12 +-
llvm/lib/Target/X86/X86InstrInfo.td | 1 +
llvm/lib/Target/X86/X86InstrPredicates.td | 3 +
llvm/lib/Target/X86/X86InstrSSE.td | 22 +-
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 10 +
llvm/lib/TargetParser/Host.cpp | 11 +-
llvm/lib/TargetParser/X86TargetParser.cpp | 3 +
.../CodeGen/X86/avx10_2_512ni-intrinsics.ll | 41 ++++
llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll | 216 ++++++++++++++++++
.../test/MC/Disassembler/X86/avx10_2ni-32.txt | 150 ++++++++++++
.../test/MC/Disassembler/X86/avx10_2ni-64.txt | 150 ++++++++++++
llvm/test/MC/X86/avx10_2ni-32-intel.s | 149 ++++++++++++
llvm/test/MC/X86/avx10_2ni-64-att.s | 149 ++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 9 +
llvm/utils/TableGen/X86DisassemblerTables.cpp | 32 ++-
llvm/utils/TableGen/X86ManualInstrMapping.def | 4 +
llvm/utils/TableGen/X86RecognizableInstr.cpp | 26 ++-
llvm/utils/TableGen/X86RecognizableInstr.h | 2 +
46 files changed, 1413 insertions(+), 34 deletions(-)
create mode 100644 clang/lib/Headers/avx10_2_512niintrin.h
create mode 100644 clang/lib/Headers/avx10_2niintrin.h
create mode 100644 clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
create mode 100644 clang/test/CodeGen/X86/avx10_2ni-builtins.c
create mode 100644 llvm/lib/Target/X86/X86InstrAVX10.td
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
create mode 100644 llvm/test/MC/X86/avx10_2ni-32-intel.s
create mode 100644 llvm/test/MC/X86/avx10_2ni-64-att.s
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 866adefd5d3c4..183adb9e003f2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -216,6 +216,8 @@ X86 Support
functions defined by the ``*mmintrin.h`` headers. A mapping can be
found in the file ``clang/www/builtins.py``.
+- Support ISA of ``AVX10.2``.
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 06ca30d65f5bd..f028711a807c0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1959,6 +1959,14 @@ TARGET_HEADER_BUILTIN(__readgsword, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES,
TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+// AVX10.2 VMPSADBW
+TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2-512")
+
+// AVX10.2 YMM Rounding
+TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+
// AVX-VNNI-INT16
TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16")
TARGET_BUILTIN(__builtin_ia32_vpdpwsud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f690467bb82cd..b5c19ebaaffab 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6205,6 +6205,12 @@ def mavx10_1_512 : Flag<["-"], "mavx10.1-512">, Group<m_x86_AVX10_Features_Group
def mno_avx10_1_512 : Flag<["-"], "mno-avx10.1-512">, Group<m_x86_AVX10_Features_Group>;
def mavx10_1 : Flag<["-"], "mavx10.1">, Alias<mavx10_1_256>;
def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Alias<mno_avx10_1_256>;
+def mavx10_2_256 : Flag<["-"], "mavx10.2-256">, Group<m_x86_AVX10_Features_Group>;
+def mno_avx10_2_256 : Flag<["-"], "mno-avx10.2-256">, Group<m_x86_AVX10_Features_Group>;
+def mavx10_2_512 : Flag<["-"], "mavx10.2-512">, Group<m_x86_AVX10_Features_Group>;
+def mno_avx10_2_512 : Flag<["-"], "mno-avx10.2-512">, Group<m_x86_AVX10_Features_Group>;
+def mavx10_2 : Flag<["-"], "mavx10.2">, Alias<mavx10_2_256>;
+def mno_avx10_2 : Flag<["-"], "mno-avx10.2">, Alias<mno_avx10_2_256>;
def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 18e6dbf03e00d..3fb3587eb5914 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -304,6 +304,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX10_1 = true;
} else if (Feature == "+avx10.1-512") {
HasAVX10_1_512 = true;
+ } else if (Feature == "+avx10.2-256") {
+ HasAVX10_2 = true;
+ } else if (Feature == "+avx10.2-512") {
+ HasAVX10_2_512 = true;
} else if (Feature == "+avx512cd") {
HasAVX512CD = true;
} else if (Feature == "+avx512vpopcntdq") {
@@ -824,6 +828,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX10_1__");
if (HasAVX10_1_512)
Builder.defineMacro("__AVX10_1_512__");
+ if (HasAVX10_2)
+ Builder.defineMacro("__AVX10_2__");
+ if (HasAVX10_2_512)
+ Builder.defineMacro("__AVX10_2_512__");
if (HasAVX512CD)
Builder.defineMacro("__AVX512CD__");
if (HasAVX512VPOPCNTDQ)
@@ -1056,6 +1064,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx", true)
.Case("avx10.1-256", true)
.Case("avx10.1-512", true)
+ .Case("avx10.2-256", true)
+ .Case("avx10.2-512", true)
.Case("avx2", true)
.Case("avx512f", true)
.Case("avx512cd", true)
@@ -1171,6 +1181,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx", SSELevel >= AVX)
.Case("avx10.1-256", HasAVX10_1)
.Case("avx10.1-512", HasAVX10_1_512)
+ .Case("avx10.2-256", HasAVX10_2)
+ .Case("avx10.2-512", HasAVX10_2_512)
.Case("avx2", SSELevel >= AVX2)
.Case("avx512f", SSELevel >= AVX512F)
.Case("avx512cd", HasAVX512CD)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index ba34ab2c7f336..79fd5867cf667 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasF16C = false;
bool HasAVX10_1 = false;
bool HasAVX10_1_512 = false;
+ bool HasAVX10_2 = false;
+ bool HasAVX10_2_512 = false;
bool HasEVEX512 = false;
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index dc6c8695488bb..b2109e11038fe 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -241,7 +241,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
assert(Name.starts_with("avx10.") && "Invalid AVX10 feature name.");
StringRef Version, Width;
std::tie(Version, Width) = Name.substr(6).split('-');
- assert(Version == "1" && "Invalid AVX10 feature name.");
+ assert((Version == "1" || Version == "2") && "Invalid AVX10 feature name.");
assert((Width == "256" || Width == "512") && "Invalid AVX10 feature name.");
#endif
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 89fa0ecd45eb4..b17ab24d625a0 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,6 +147,8 @@ set(x86_files
amxcomplexintrin.h
amxfp16intrin.h
amxintrin.h
+ avx10_2_512niintrin.h
+ avx10_2niintrin.h
avx2intrin.h
avx512bf16intrin.h
avx512bitalgintrin.h
diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h
new file mode 100644
index 0000000000000..98ed9c72afd0c
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512niintrin.h
@@ -0,0 +1,35 @@
+/*===---- avx10_2_512niintrin.h - AVX10.2-512 new instruction intrinsics ---===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512niintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2_512INTRIN_H
+#define __AVX10_2_512INTRIN_H
+
+/* VMPSADBW */
+#define _mm512_mpsadbw_epu8(A, B, imm) \
+ ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), (int)(imm)))
+
+#define _mm512_mask_mpsadbw_epu8(W, U, A, B, imm) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_mpsadbw_epu8(U, A, B, imm) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#endif /* __SSE2__ */
+#endif /* __AVX10_2_512INTRIN_H */
diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h
new file mode 100644
index 0000000000000..bbd8eb7609b66
--- /dev/null
+++ b/clang/lib/Headers/avx10_2niintrin.h
@@ -0,0 +1,83 @@
+/*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx10_2niintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2INTRIN_H
+#define __AVX10_2INTRIN_H
+
+/* VMPSADBW */
+#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \
+ ((__m128i)__builtin_ia32_selectw_128( \
+ (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \
+ (__v8hi)(__m128i)(W)))
+
+#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \
+ ((__m128i)__builtin_ia32_selectw_128( \
+ (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \
+ (__v8hi)_mm_setzero_si128()))
+
+#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \
+ ((__m256i)__builtin_ia32_selectw_256( \
+ (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \
+ (__v16hi)(__m256i)(W)))
+
+#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \
+ ((__m256i)__builtin_ia32_selectw_256( \
+ (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \
+ (__v16hi)_mm256_setzero_si256()))
+
+/* YMM Rounding */
+#define _mm256_add_round_pd(A, B, R) \
+ ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_add_round_pd(W, U, A, B, R) \
+ ((__m256d)__builtin_ia32_selectpd_256( \
+ (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \
+ (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_add_round_pd(U, A, B, R) \
+ ((__m256d)__builtin_ia32_selectpd_256( \
+ (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \
+ (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_add_round_ph(A, B, R) \
+ ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \
+ (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_add_round_ph(W, U, A, B, R) \
+ ((__m256h)__builtin_ia32_selectph_256( \
+ (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \
+ (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_add_round_ph(U, A, B, R) \
+ ((__m256h)__builtin_ia32_selectph_256( \
+ (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \
+ (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_add_round_ps(A, B, R) \
+ ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_add_round_ps(W, U, A, B, R) \
+ ((__m256)__builtin_ia32_selectps_256( \
+ (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \
+ (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_add_round_ps(U, A, B, R) \
+ ((__m256)__builtin_ia32_selectps_256( \
+ (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \
+ (__v8sf)_mm256_setzero_ps()))
+
+#endif /* __AVX10_2INTRIN_H */
+#endif /* __SSE2__ */
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index cd6cf09b90cad..e0957257ed5c7 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -648,6 +648,14 @@ _storebe_i64(void * __P, long long __D) {
#include <avx512vlvp2intersectintrin.h>
#endif
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
+#include <avx10_2niintrin.h>
+#endif
+
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
+#include <avx10_2_512niintrin.h>
+#endif
+
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
#include <enqcmdintrin.h>
#endif
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 8f9057bbaf259..bf2d2d8ac8f42 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -162,6 +162,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_mulps512:
case X86::BI__builtin_ia32_subpd512:
case X86::BI__builtin_ia32_subps512:
+ case X86::BI__builtin_ia32_vaddpd256_round:
+ case X86::BI__builtin_ia32_vaddph256_round:
+ case X86::BI__builtin_ia32_vaddps256_round:
case X86::BI__builtin_ia32_cvtsi2sd64:
case X86::BI__builtin_ia32_cvtsi2ss32:
case X86::BI__builtin_ia32_cvtsi2ss64:
diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
new file mode 100644
index 0000000000000..5983e0d969b68
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// VMPSADBW
+__m512i test_mm512_mpsadbw_epu8(__m512i __A, __m512i __B) {
+// CHECK-LABEL: @test_mm512_mpsadbw_epu8
+// CHECK: @llvm.x86.avx10.vmpsadbw.512
+ return _mm512_mpsadbw_epu8(__A, __B, 17);
+}
+
+__m512i test_mm512_mask_mpsadbw_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+// CHECK-LABEL: @test_mm512_mask_mpsadbw_epu8
+// CHECK: @llvm.x86.avx10.vmpsadbw.512
+// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_mpsadbw_epu8(__W, __U, __A, __B, 17);
+}
+
+__m512i test_mm512_maskz_mpsadbw_epu8(__mmask32 __U, __m512i __A, __m512i __B) {
+// CHECK-LABEL: @test_mm512_maskz_mpsadbw_epu8
+// CHECK: @llvm.x86.avx10.vmpsadbw.512
+// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_maskz_mpsadbw_epu8(__U, __A, __B, 17);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
new file mode 100644
index 0000000000000..c8e4d3c906a72
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// VMPSADBW
+__m128i test_mm_mpsadbw_epu8(__m128i __A, __m128i __B) {
+// CHECK-LABEL: @test_mm_mpsadbw_epu8
+// CHECK: @llvm.x86.sse41.mpsadbw
+ return _mm_mpsadbw_epu8(__A, __B, 170);
+}
+
+__m128i test_mm_mask_mpsadbw_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+// CHECK-LABEL: @test_mm_mask_mpsadbw_epu8
+// CHECK: @llvm.x86.sse41.mpsadbw
+// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+ return _mm_mask_mpsadbw_epu8(__W, __U, __A, __B, 170);
+}
+
+__m128i test_mm_maskz_mpsadbw_epu8(__mmask8 __U, __m128i __A, __m128i __B) {
+// CHECK-LABEL: @test_mm_maskz_mpsadbw_epu8
+// CHECK: @llvm.x86.sse41.mpsadbw
+// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+ return _mm_maskz_mpsadbw_epu8(__U, __A, __B, 170);
+}
+
+__m256i test_mm256_mpsadbw_epu8(__m256i __A, __m256i __B) {
+// CHECK-LABEL: @test_mm256_mpsadbw_epu8
+// CHECK: @llvm.x86.avx2.mpsadbw
+ return _mm256_mpsadbw_epu8(__A, __B, 170);
+}
+
+__m256i test_mm256_mask_mpsadbw_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+// CHECK-LABEL: @test_mm256_mask_mpsadbw_epu8
+// CHECK: @llvm.x86.avx2.mpsadbw
+// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+ return _mm256_mask_mpsadbw_epu8(__W, __U, __A, __B, 170);
+}
+
+__m256i test_mm256_maskz_mpsadbw_epu8(__mmask16 __U, __m256i __A, __m256i __B) {
+// CHECK-LABEL: @test_mm256_maskz_mpsadbw_epu8
+// CHECK: @llvm.x86.avx2.mpsadbw
+// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+ return _mm256_maskz_mpsadbw_epu8(__U, __A, __B, 170);
+}
+
+// YMM Rounding
+__m256d test_mm256_add_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_add_round_pd
+// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+ return _mm256_add_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_add_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_add_round_pd
+// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm256_mask_add_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_add_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_add_round_pd
+// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm256_maskz_add_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_add_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_add_round_ph
+// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11)
+ return _mm256_add_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_add_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_add_round_ph
+// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+ return _mm256_mask_add_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_add_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_add_round_ph
+// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+ return _mm256_maskz_add_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_add_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_add_round_ps
+// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+ return _mm256_add_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_add_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_add_round_ps
+// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+ return _mm256_mask_add_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_add_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_add_round_ps
+// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+ return _mm256_maskz_add_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index b1ae6678531b9..593ccffbcda09 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -59,10 +59,10 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
@@ -76,5 +76,5 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: "target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-evex512"
+// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-avx10.2-512,-evex512"
// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 7d77ae75f8c47..ddfbb29a48f8d 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -386,6 +386,13 @@
// RUN: %clang --target=i386 -march=i386 -mavx10.1 -mno-avx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-AVX512 %s
// RUN: %clang --target=i386 -march=i386 -mavx10.1 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s
// RUN: %clang --target=i386 -march=i386 -mavx10.1 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s
+// RUN: %clang --target=i386 -mavx10.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_256 %s
+// RUN: %clang --target=i386 -mavx10.2-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_256 %s
+// RUN: %clang --target=i386 -mavx10.2-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_512 %s
+// RUN: %clang --target=i386 -mavx10.2-256 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_256,AVX10_1_512 %s
+// RUN: %clang --target=i386 -mavx10.2-512 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_512,AVX10_1_256 %s
+// AVX10_2_256: "-target-feature" "+avx10.2-256"
+// AVX10_2_512: "-target-feature" "+avx10.2-512"
// AVX10_1_256: "-target-feature" "+avx10.1-256"
// AVX10_1_512: "-target-feature" "+avx10.1-512"
// BAD-AVX10: error: unknown argument{{:?}} '-mavx10.{{.*}}'
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 5d510cb4667f4..8b4e6bdc09226 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -712,7 +712,12 @@
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-256 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-256 -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_256 %s
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_256,AVX10_2_256 %s
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2-256 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_256,AVX10_2_256 %s
+// AVX10_1_256-NOT: __AVX10_1_512__
// AVX10_1_256: #define __AVX10_1__ 1
+// AVX10_2_256-NOT: __AVX10_2_512__
+// AVX10_2_256: #define __AVX10_2__ 1
// AVX10_1_256: #define __AVX512F__ 1
// AVX10_1_256: #define __EVEX256__ 1
// AVX10_1_256-NOT: __EVEX512__
@@ -720,7 +725,11 @@
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -mno-avx512f -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.1-512 -mno-evex512 -x c -E -dM -o - %s | FileCheck -check-prefix=AVX10_1_512 %s
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavx10.2-512 -x c -E -dM -o - %s | FileCheck -check-prefixes=AVX10_1_512,AVX10_2_512 %s
+// AVX10_1_512: #define __AVX10_1_512__ 1
// AVX10_1_512: #define __AVX10_1__ 1
+// AVX10_2_512: #define __AVX10_2_512__ 1
+// AVX10_2_512: #define __AVX10_2__ 1
// AVX10_1_512: #define __AVX512F__ 1
// AVX10_1_512: #define __EVEX256__ 1
// AVX10_1_512: #define __EVEX512__ 1
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 551a9bec3b916..2486663956c3f 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -129,6 +129,8 @@ Changes to the X86 Backend
generally seen in the wild (Clang never generates them!), so this is
not expected to result in real-world compatibility problems.
+* Support ISA of ``AVX10.2-256`` and ``AVX10.2-512``.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index b6a92136f3828..515b0d0fcc22c 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -764,7 +764,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw128">,
DefaultAttrsIntrinsic<[llvm_v8i16_ty],
- [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
@@ -4977,6 +4977,34 @@ let TargetPrefix = "x86" in {
ImmArg<ArgIndex<4>>]>;
}
+//===----------------------------------------------------------------------===//
+// AVX10.2 intrinsics
+let TargetPrefix = "x86" in {
+ // VMPSADBW
+ def int_x86_avx10_vmpsadbw_512 :
+ ClangBuiltin<"__builtin_ia32_mpsadbw512">,
+ DefaultAttrsIntrinsic<[llvm_v32i16_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+ // YMM Rounding
+ def int_x86_avx10_vaddpd256 :
+ ClangBuiltin<"__builtin_ia32_vaddpd256_round">,
+ DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_x86_avx10_vaddph256 :
+ ClangBuiltin<"__builtin_ia32_vaddph256_round">,
+ DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+ [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_x86_avx10_vaddps256 :
+ ClangBuiltin<"__builtin_ia32_vaddps256_round">,
+ DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+}
+
//===----------------------------------------------------------------------===//
// SHA intrinsics
let TargetPrefix = "x86" in {
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 5daae45df2f83..5ec8a718d5a3e 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -71,7 +71,8 @@ enum attributeBits {
ATTR_EVEXB = 0x1 << 12,
ATTR_REX2 = 0x1 << 13,
ATTR_EVEXNF = 0x1 << 14,
- ATTR_max = 0x1 << 15,
+ ATTR_EVEXU = 0x1 << 15,
+ ATTR_max = 0x1 << 16,
};
// Combinations of the above attributes that are relevant to instruction
@@ -320,7 +321,47 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
- ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_B_U, 2, "requires EVEX_B and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_B_U, 3, "requires EVEX_B, XS and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_B_U, 3, "requires EVEX_B, XD and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_B_U, 3, \
+ "requires EVEX_B, OpSize and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_B_U, 4, "requires EVEX_B, W, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_B_U, 5, "requires EVEX_B, W, XS, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_B_U, 5, "requires EVEX_B, W, XD, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_B_U, 5, \
+ "requires EVEX_B, W, OpSize and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_K_B_U, 2, "requires EVEX_B, EVEX_K and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_K_B_U, 3, \
+ "requires EVEX_B, EVEX_K, XS and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_K_B_U, 3, \
+ "requires EVEX_B, EVEX_K, XD and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_K_B_U, 3, \
+ "requires EVEX_B, EVEX_K, OpSize and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_K_B_U, 4, \
+ "requires EVEX_B, EVEX_K, W, and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_K_B_U, 5, \
+ "requires EVEX_B, EVEX_K, W, XS, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_K_B_U, 5, \
+ "requires EVEX_B, EVEX_K, W, XD, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B_U, 5, \
+ "requires EVEX_B, EVEX_K, W, OpSize, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_KZ_B_U, 2, "requires EVEX_B, EVEX_KZ and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ_B_U, 3, \
+ "requires EVEX_B, EVEX_KZ, XS, and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ_B_U, 3, \
+ "requires EVEX_B, EVEX_KZ, XD, and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B_U, 3, \
+ "requires EVEX_B, EVEX_KZ, OpSize and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ_B_U, 4, \
+ "requires EVEX_B, EVEX_KZ, W and the EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ_B_U, 5, \
+ "requires EVEX_B, EVEX_KZ, W, XS, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ_B_U, 5, \
+ "requires EVEX_B, EVEX_KZ, W, XD, and EVEX_U prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B_U, 5, \
+ "requires EVEX_B, EVEX_KZ, W, OpSize and EVEX_U prefix")
#define ENUM_ENTRY(n, r, d) n,
enum InstructionContext { INSTRUCTION_CONTEXTS IC_max };
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 92798cbe4b4c1..5652fb8bde086 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -257,6 +257,8 @@ X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE (ZU, "zu")
+X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256", 0)
+X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 6272e2d270f25..46871e1febd6c 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1219,6 +1219,9 @@ static int getInstructionID(struct InternalInstruction *insn,
attrMask |= ATTR_EVEXKZ;
if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXB;
+ if (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) &&
+ (insn->opcodeType != MAP4))
+ attrMask |= ATTR_EVEXU;
if (isNF(insn) && !readModRM(insn) &&
!isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
attrMask |= ATTR_EVEXNF;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index b24b8acce6412..a3af9affa5fd0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -872,7 +872,10 @@ enum : uint64_t {
EVEX_NF = 1ULL << EVEX_NFShift,
// TwoConditionalOps - Set if this instruction has two conditional operands
TwoConditionalOps_Shift = EVEX_NFShift + 1,
- TwoConditionalOps = 1ULL << TwoConditionalOps_Shift
+ TwoConditionalOps = 1ULL << TwoConditionalOps_Shift,
+ // EVEX_U - Set if this instruction has EVEX.U field set.
+ EVEX_UShift = TwoConditionalOps_Shift + 1,
+ EVEX_U = 1ULL << EVEX_UShift
};
/// \returns true if the instruction with given opcode is a prefix.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 6553e1cc4a930..469a385e08527 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -111,9 +111,9 @@ class X86OpcodePrefixHelper {
// 0b11: F2
// EVEX (4 bytes)
- // +-----+ +---------------+ +--------------------+ +------------------------+
- // | 62h | | RXBR' | B'mmm | | W | vvvv | X' | pp | | z | L'L | b | v' | aaa |
- // +-----+ +---------------+ +--------------------+ +------------------------+
+ // +-----+ +---------------+ +-------------------+ +------------------------+
+ // | 62h | | RXBR' | B'mmm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
+ // +-----+ +---------------+ +-------------------+ +------------------------+
// EVEX_L2/VEX_L (Vector Length):
// L2 L
@@ -131,7 +131,7 @@ class X86OpcodePrefixHelper {
// | RM (VR) | EVEX_X | EVEX_B | modrm.r/m | VR | Dest or Src |
// | RM (GPR) | EVEX_B' | EVEX_B | modrm.r/m | GPR | Dest or Src |
// | BASE | EVEX_B' | EVEX_B | modrm.r/m | GPR | MA |
- // | INDEX | EVEX_X' | EVEX_X | sib.index | GPR | MA |
+ // | INDEX | EVEX_U | EVEX_X | sib.index | GPR | MA |
// | VIDX | EVEX_v' | EVEX_X | sib.index | VR | VSIB MA |
// +----------+---------+--------+-----------+---------+--------------+
//
@@ -238,6 +238,7 @@ class X86OpcodePrefixHelper {
void setZ(bool V) { EVEX_z = V; }
void setL2(bool V) { EVEX_L2 = V; }
void setEVEX_b(bool V) { EVEX_b = V; }
+ void setEVEX_U(bool V) { X2 = V; }
void setV2(const MCInst &MI, unsigned OpNum, bool HasVEX_4V) {
// Only needed with VSIB which don't use VVVV.
if (HasVEX_4V)
@@ -1052,6 +1053,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
Prefix.setZ(HasEVEX_K && (TSFlags & X86II::EVEX_Z));
Prefix.setEVEX_b(TSFlags & X86II::EVEX_B);
+ Prefix.setEVEX_U(TSFlags & X86II::EVEX_U);
bool EncodeRC = false;
uint8_t EVEX_rc = 0;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9dafd5e628ca8..988966fa6a6c4 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -326,6 +326,12 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
+def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true",
+ "Support AVX10.2 up to 256-bit instruction",
+ [FeatureAVX10_1]>;
+def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true",
+ "Support AVX10.2 up to 512-bit instruction",
+ [FeatureAVX10_2, FeatureAVX10_1_512]>;
def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
"Support extended general purpose register">;
def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 73405397aa6e8..9fafb66ab0b3f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34033,6 +34033,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVTNEPS2BF16)
NODE_NAME_CASE(MCVTNEPS2BF16)
NODE_NAME_CASE(DPBF16PS)
+ NODE_NAME_CASE(MPSADBW)
NODE_NAME_CASE(LWPINS)
NODE_NAME_CASE(MGATHER)
NODE_NAME_CASE(MSCATTER)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4fd320885d608 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -595,6 +595,8 @@ namespace llvm {
VPDPBSSD,
VPDPBSSDS,
+ MPSADBW,
+
// Compress and expand.
COMPRESS,
EXPAND,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
new file mode 100644
index 0000000000000..666667895bc39
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -0,0 +1,33 @@
+//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 AVX10 instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+// VMPSADBW
+defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW,
+ avx512vl_i16_info, avx512vl_i8_info,
+ HasAVX10_2>,
+ XS, EVEX_CD8<32, CD8VF>;
+
+// YMM Rounding
+multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+ X86SchedWriteSizes sched> {
+ defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM,
+ v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
+ defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM,
+ v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM,
+ v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in
+ defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 31ee288c6f8bb..7a9c164c031d5 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -282,6 +282,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix;
bits<2> explicitOpPrefixBits = explicitOpPrefix.Value;
+ bit hasEVEX_U = 0; // Does this inst set the EVEX_U field?
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
@@ -309,4 +310,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{51-50} = explicitOpPrefixBits;
let TSFlags{52} = hasEVEX_NF;
let TSFlags{53} = hasTwoConditionalOps;
+ let TSFlags{54} = hasEVEX_U;
}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index dff33a469b97a..74596cec5c5ef 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -74,11 +74,11 @@ def X86psadbw : SDNode<"X86ISD::PSADBW",
SDTCVecEltisVT<1, i8>,
SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>]>, [SDNPCommutative]>;
-def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
- SDTCVecEltisVT<1, i8>,
- SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>>;
+def SDTX86PSADBW : SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", SDTX86PSADBW>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -809,6 +809,8 @@ def X86vpdpbsuds : SDNode<"X86ISD::VPDPBSUDS", SDTVnni>;
def X86vpdpbuud : SDNode<"X86ISD::VPDPBUUD", SDTVnni>;
def X86vpdpbuuds : SDNode<"X86ISD::VPDPBUUDS", SDTVnni>;
+def X86Vmpsadbw : SDNode<"X86ISD::MPSADBW", SDTX86PSADBW>;
+
//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 4792784336109..e75d6743f9273 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -63,6 +63,7 @@ include "X86InstrXOP.td"
// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
include "X86InstrAVX512.td"
+include "X86InstrAVX10.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index f6038cf7a94cb..a815ddc9714f0 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -71,6 +71,9 @@ def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
def HasEVEX512 : Predicate<"Subtarget->hasEVEX512()">;
def HasAVX10_1 : Predicate<"Subtarget->hasAVX10_1()">;
def HasAVX10_1_512 : Predicate<"Subtarget->hasAVX10_1_512()">;
+def HasAVX10_2 : Predicate<"Subtarget->hasAVX10_2()">;
+def HasAVX10_2_512 : Predicate<"Subtarget->hasAVX10_2_512()">;
+def NoAVX10_2 : Predicate<"!Subtarget->hasAVX10_2()">;
def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index bc15085f6c7b7..2fc3b6aa98858 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6115,11 +6115,11 @@ def BlendScaleCommuteImm2to4 : SDNodeXForm<timm, [{
return getI8Imm(NewImm ^ 0xf, SDLoc(N));
}]>;
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoAVX10_2] in {
let isCommutable = 0 in {
- defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, load, i128mem, 0,
- SchedWriteMPSAD.XMM>, VEX, VVVV, WIG;
+ defm VMPSADBW : SS41I_binop_rmi<0x42, "vmpsadbw", X86Vmpsadbw,
+ v8i16, VR128, load, i128mem, 0,
+ SchedWriteMPSAD.XMM>, VEX, VVVV, WIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
@@ -6138,19 +6138,19 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
}
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoAVX10_2] in {
let isCommutable = 0 in {
- defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, load, i256mem, 0,
- SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG;
+ defm VMPSADBWY : SS41I_binop_rmi<0x42, "vmpsadbw", X86Vmpsadbw,
+ v16i16, VR256, load, i256mem, 0,
+ SchedWriteMPSAD.YMM>, VEX, VVVV, VEX_L, WIG;
}
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memop, i128mem, 1,
- SchedWriteMPSAD.XMM>;
+ defm MPSADBW : SS41I_binop_rmi<0x42, "mpsadbw", X86Vmpsadbw,
+ v8i16, VR128, memop, i128mem, 1,
+ SchedWriteMPSAD.XMM>;
}
let ExeDomain = SSEPackedSingle in
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 685daca360e08..000138e1837af 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -388,6 +388,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV,
0),
+ X86_INTRINSIC_DATA(avx10_vaddpd256, INTR_TYPE_2OP, ISD::FADD,
+ X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx10_vaddph256, INTR_TYPE_2OP, ISD::FADD,
+ X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD,
+ X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx10_vmpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW,
+ 0),
+ X86_INTRINSIC_DATA(avx2_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -1663,6 +1672,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_blendvpd, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(sse41_blendvps, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
+ X86_INTRINSIC_DATA(sse41_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(sse41_pblendvb, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(sse41_phminposuw, INTR_TYPE_1OP, X86ISD::PHMINPOS, 0),
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 68aed69ee574b..986b9a211ce6c 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1819,7 +1819,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
- Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
+ bool HasAVX10 = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
bool HasAPXF = HasLeaf7Subleaf1 && ((EDX >> 21) & 1);
Features["egpr"] = HasAPXF;
Features["push2pop2"] = HasAPXF;
@@ -1849,8 +1849,13 @@ const StringMap<bool> sys::getHostCPUFeatures() {
bool HasLeaf24 =
MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
- Features["avx10.1-512"] =
- Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
+
+ int AVX10Ver = HasLeaf24 && (EBX & 0xff);
+ int Has512Len = HasLeaf24 && ((EBX >> 18) & 1);
+ Features["avx10.1-256"] = HasAVX10 && AVX10Ver >= 1;
+ Features["avx10.1-512"] = HasAVX10 && AVX10Ver >= 1 && Has512Len;
+ Features["avx10.2-256"] = HasAVX10 && AVX10Ver >= 2;
+ Features["avx10.2-512"] = HasAVX10 && AVX10Ver >= 2 && Has512Len;
return Features;
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dcf9130052ac1..57bda0651ea82 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -620,6 +620,9 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512FP16;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
FeatureAVX10_1 | FeatureEVEX512;
+constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
+constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 =
+ FeatureAVX10_2 | FeatureAVX10_1_512;
// APX Features
constexpr FeatureBitset ImpliedFeaturesEGPR = {};
diff --git a/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
new file mode 100644
index 0000000000000..bafa52a2a83ae
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefix=X64
+
+; VMPSADBW
+
+define { <32 x i16>, <32 x i16>, <32 x i16> } @test_mm512_mask_mpsadbw(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
+; X86-LABEL: test_mm512_mask_mpsadbw:
+; X86: # %bb.0:
+; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02]
+; X86-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03]
+; X86-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04]
+; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X86-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_mpsadbw:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02]
+; X64-NEXT: vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03]
+; X64-NEXT: vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04]
+; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
+; X64-NEXT: vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
+; X64-NEXT: retq # encoding: [0xc3]
+ %msk = bitcast i32 %x4 to <32 x i1>
+ %rs1 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 2)
+ %ad2 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 3)
+ %rs2 = select <32 x i1> %msk, <32 x i16> %ad2, <32 x i16> %x3
+ %ad3 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 4)
+ %rs3 = select <32 x i1> %msk, <32 x i16> %ad3, <32 x i16> zeroinitializer
+ %rs4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } undef, <32 x i16> %rs1, 0
+ %rs5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs4, <32 x i16> %rs2, 1
+ %rs6 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs5, <32 x i16> %rs3, 2
+ ret { <32 x i16>, <32 x i16>, <32 x i16> } %rs6
+}
+
+declare <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8>, <64 x i8>, i8)
diff --git a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
new file mode 100644
index 0000000000000..4080546c0c543
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+; VMPSADBW
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_mask_mpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
+; X86-LABEL: test_mask_mpsadbw_128:
+; X86: # %bb.0:
+; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmpsadbw $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x42,0xd9,0x02]
+; X86-NEXT: vmpsadbw $3, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x09,0x42,0xe1,0x03]
+; X86-NEXT: vmpsadbw $4, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0x89,0x42,0xd1,0x04]
+; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X86-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_mpsadbw_128:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2]
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmpsadbw $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x42,0xd9,0x02]
+; X64-NEXT: vmpsadbw $3, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x09,0x42,0xe1,0x03]
+; X64-NEXT: vmpsadbw $4, %xmm1, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0x89,0x42,0xd1,0x04]
+; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
+; X64-NEXT: vmovdqa %xmm4, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcc]
+; X64-NEXT: retq # encoding: [0xc3]
+ %msk = bitcast i8 %x4 to <8 x i1>
+ %rs1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 2)
+ %ad2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 3)
+ %rs2 = select <8 x i1> %msk, <8 x i16> %ad2, <8 x i16> %x3
+ %ad3 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %x0, <16 x i8> %x1, i8 4)
+ %rs3 = select <8 x i1> %msk, <8 x i16> %ad3, <8 x i16> zeroinitializer
+ %rs4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } undef, <8 x i16> %rs1, 0
+ %rs5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %rs4, <8 x i16> %rs2, 1
+ %rs6 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %rs5, <8 x i16> %rs3, 2
+ ret { <8 x i16>, <8 x i16>, <8 x i16> } %rs6
+}
+
+define { <16 x i16>, <16 x i16>, <16 x i16> } @test_mask_mpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
+; X86-LABEL: test_mask_mpsadbw_256:
+; X86: # %bb.0:
+; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmpsadbw $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x42,0xd9,0x02]
+; X86-NEXT: vmpsadbw $3, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x29,0x42,0xe1,0x03]
+; X86-NEXT: vmpsadbw $4, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xa9,0x42,0xd1,0x04]
+; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X86-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_mpsadbw_256:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2]
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmpsadbw $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x42,0xd9,0x02]
+; X64-NEXT: vmpsadbw $3, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x29,0x42,0xe1,0x03]
+; X64-NEXT: vmpsadbw $4, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xa9,0x42,0xd1,0x04]
+; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
+; X64-NEXT: vmovdqa %ymm4, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcc]
+; X64-NEXT: retq # encoding: [0xc3]
+ %msk = bitcast i16 %x4 to <16 x i1>
+ %rs1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 2)
+ %ad2 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 3)
+ %rs2 = select <16 x i1> %msk, <16 x i16> %ad2, <16 x i16> %x3
+ %ad3 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %x0, <32 x i8> %x1, i8 4)
+ %rs3 = select <16 x i1> %msk, <16 x i16> %ad3, <16 x i16> zeroinitializer
+ %rs4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } undef, <16 x i16> %rs1, 0
+ %rs5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %rs4, <16 x i16> %rs2, 1
+ %rs6 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %rs5, <16 x i16> %rs3, 2
+ ret { <16 x i16>, <16 x i16>, <16 x i16> } %rs6
+}
+
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8)
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8)
+
+; YMM Rounding
+
+declare <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vaddpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vaddpd256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %A, <4 x double> %B, i32 11)
+ ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vaddpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vaddpd256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vaddpd256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %C, <4 x double> %D, i32 10)
+ %msk = bitcast i4 %B to <4 x i1>
+ %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+ ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vaddpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vaddpd256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vaddpd256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <4 x double> @llvm.x86.avx10.vaddpd256(<4 x double> %B, <4 x double> %C, i32 9)
+ %msk = bitcast i4 %A to <4 x i1>
+ %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+ ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vaddph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vaddph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vaddph256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %A, <16 x half> %B, i32 11)
+ ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vaddph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vaddph256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vaddph256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %C, <16 x half> %D, i32 10)
+ %msk = bitcast i16 %B to <16 x i1>
+ %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+ ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vaddph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vaddph256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vaddph256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <16 x half> @llvm.x86.avx10.vaddph256(<16 x half> %B, <16 x half> %C, i32 9)
+ %msk = bitcast i16 %A to <16 x i1>
+ %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+ ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vaddps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vaddps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vaddps256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %A, <8 x float> %B, i32 11)
+ ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vaddps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vaddps256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vaddps256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %C, <8 x float> %D, i32 10)
+ %msk = bitcast i8 %B to <8 x i1>
+ %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+ ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vaddps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vaddps256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vaddps256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+ %ret0 = call <8 x float> @llvm.x86.avx10.vaddps256(<8 x float> %B, <8 x float> %C, i32 9)
+ %msk = bitcast i8 %A to <8 x i1>
+ %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+ ret <8 x float> %ret
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
new file mode 100644
index 0000000000000..59457e6eec293
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
@@ -0,0 +1,150 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# VMPSADBW
+
+# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2
+# INTEL: vmpsadbw xmm2, xmm3, xmm4, 123
+0xc4,0xe3,0x61,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vmpsadbw xmm2 {k7}, xmm3, xmm4, 123
+0x62,0xf3,0x66,0x0f,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123
+0x62,0xf3,0x66,0x8f,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2
+# INTEL: vmpsadbw ymm2, ymm3, ymm4, 123
+0xc4,0xe3,0x65,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmpsadbw ymm2 {k7}, ymm3, ymm4, 123
+0x62,0xf3,0x66,0x2f,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123
+0x62,0xf3,0x66,0xaf,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2
+# INTEL: vmpsadbw zmm2, zmm3, zmm4, 123
+0x62,0xf3,0x66,0x48,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vmpsadbw zmm2 {k7}, zmm3, zmm4, 123
+0x62,0xf3,0x66,0x4f,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123
+0x62,0xf3,0x66,0xcf,0x42,0xd4,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+0xc4,0xe3,0x61,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x66,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%eax), %xmm3, %xmm2
+# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123
+0xc4,0xe3,0x61,0x42,0x10,0x7b
+
+# ATT: vmpsadbw $123, -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+0xc4,0xe3,0x61,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x66,0x8f,0x42,0x51,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -2048(%edx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123
+0x62,0xf3,0x66,0x8f,0x42,0x52,0x80,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+0xc4,0xe3,0x65,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x66,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%eax), %ymm3, %ymm2
+# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123
+0xc4,0xe3,0x65,0x42,0x10,0x7b
+
+# ATT: vmpsadbw $123, -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+0xc4,0xe3,0x65,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x66,0xaf,0x42,0x51,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -4096(%edx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123
+0x62,0xf3,0x66,0xaf,0x42,0x52,0x80,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x66,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x66,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%eax), %zmm3, %zmm2
+# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123
+0x62,0xf3,0x66,0x48,0x42,0x10,0x7b
+
+# ATT: vmpsadbw $123, -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x66,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x66,0xcf,0x42,0x51,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -8192(%edx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123
+0x62,0xf3,0x66,0xcf,0x42,0x52,0x80,0x7b
+
+# YMM Rounding
+
+# ATT: vaddpd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vaddpd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0xe1,0x18,0x58,0xd4
+
+# ATT: vaddpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0xe1,0x3f,0x58,0xd4
+
+# ATT: vaddpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0xe1,0xff,0x58,0xd4
+
+# ATT: vaddph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vaddph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf5,0x60,0x18,0x58,0xd4
+
+# ATT: vaddph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf5,0x60,0x3f,0x58,0xd4
+
+# ATT: vaddph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf5,0x60,0xff,0x58,0xd4
+
+# ATT: vaddps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vaddps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0x60,0x18,0x58,0xd4
+
+# ATT: vaddps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0x60,0x3f,0x58,0xd4
+
+# ATT: vaddps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0x60,0xff,0x58,0xd4
diff --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
new file mode 100644
index 0000000000000..34f8851d04d6b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
@@ -0,0 +1,150 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# VMPSADBW
+
+# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22
+# INTEL: vmpsadbw xmm22, xmm23, xmm24, 123
+0x62,0x83,0x46,0x00,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vmpsadbw xmm22 {k7}, xmm23, xmm24, 123
+0x62,0x83,0x46,0x07,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmm24, 123
+0x62,0x83,0x46,0x87,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22
+# INTEL: vmpsadbw ymm22, ymm23, ymm24, 123
+0x62,0x83,0x46,0x20,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmpsadbw ymm22 {k7}, ymm23, ymm24, 123
+0x62,0x83,0x46,0x27,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymm24, 123
+0x62,0x83,0x46,0xa7,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22
+# INTEL: vmpsadbw zmm22, zmm23, zmm24, 123
+0x62,0x83,0x46,0x40,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vmpsadbw zmm22 {k7}, zmm23, zmm24, 123
+0x62,0x83,0x46,0x47,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmm24, 123
+0x62,0x83,0x46,0xc7,0x42,0xf0,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x46,0x00,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vmpsadbw xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x46,0x07,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%rip), %xmm23, %xmm22
+# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [rip], 123
+0x62,0xe3,0x46,0x00,0x42,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vmpsadbw xmm22, xmm23, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x46,0x00,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x46,0x87,0x42,0x71,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmpsadbw xmm22 {k7} {z}, xmm23, xmmword ptr [rdx - 2048], 123
+0x62,0xe3,0x46,0x87,0x42,0x72,0x80,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x46,0x20,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vmpsadbw ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x46,0x27,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%rip), %ymm23, %ymm22
+# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [rip], 123
+0x62,0xe3,0x46,0x20,0x42,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vmpsadbw ymm22, ymm23, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x46,0x20,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x46,0xa7,0x42,0x71,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmpsadbw ymm22 {k7} {z}, ymm23, ymmword ptr [rdx - 4096], 123
+0x62,0xe3,0x46,0xa7,0x42,0x72,0x80,0x7b
+
+# ATT: vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x46,0x40,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vmpsadbw zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x46,0x47,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, (%rip), %zmm23, %zmm22
+# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [rip], 123
+0x62,0xe3,0x46,0x40,0x42,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vmpsadbw zmm22, zmm23, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x46,0x40,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x46,0xc7,0x42,0x71,0x7f,0x7b
+
+# ATT: vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmpsadbw zmm22 {k7} {z}, zmm23, zmmword ptr [rdx - 8192], 123
+0x62,0xe3,0x46,0xc7,0x42,0x72,0x80,0x7b
+
+# YMM Rounding
+
+# ATT: vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vaddpd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0xc1,0x10,0x58,0xf0
+
+# ATT: vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vaddpd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0xc1,0x37,0x58,0xf0
+
+# ATT: vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0xc1,0xf7,0x58,0xf0
+
+# ATT: vaddph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vaddph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x85,0x40,0x10,0x58,0xf0
+
+# ATT: vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vaddph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x85,0x40,0x37,0x58,0xf0
+
+# ATT: vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x85,0x40,0xf7,0x58,0xf0
+
+# ATT: vaddps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vaddps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0x40,0x10,0x58,0xf0
+
+# ATT: vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vaddps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0x40,0x37,0x58,0xf0
+
+# ATT: vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0x40,0xf7,0x58,0xf0
diff --git a/llvm/test/MC/X86/avx10_2ni-32-intel.s b/llvm/test/MC/X86/avx10_2ni-32-intel.s
new file mode 100644
index 0000000000000..ea9a89f316cc3
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2ni-32-intel.s
@@ -0,0 +1,149 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// VMPSADBW
+
+// CHECK: vmpsadbw xmm2, xmm3, xmm4, 123
+// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0xd4,0x7b]
+ vmpsadbw xmm2, xmm3, xmm4, 123
+
+// CHECK: vmpsadbw xmm2 {k7}, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x0f,0x42,0xd4,0x7b]
+ vmpsadbw xmm2 {k7}, xmm3, xmm4, 123
+
+// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0xd4,0x7b]
+ vmpsadbw xmm2 {k7} {z}, xmm3, xmm4, 123
+
+// CHECK: vmpsadbw ymm2, ymm3, ymm4, 123
+// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0xd4,0x7b]
+ vmpsadbw ymm2, ymm3, ymm4, 123
+
+// CHECK: vmpsadbw ymm2 {k7}, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x2f,0x42,0xd4,0x7b]
+ vmpsadbw ymm2 {k7}, ymm3, ymm4, 123
+
+// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0xd4,0x7b]
+ vmpsadbw ymm2 {k7} {z}, ymm3, ymm4, 123
+
+// CHECK: vmpsadbw zmm2, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0xd4,0x7b]
+ vmpsadbw zmm2, zmm3, zmm4, 123
+
+// CHECK: vmpsadbw zmm2 {k7}, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x4f,0x42,0xd4,0x7b]
+ vmpsadbw zmm2 {k7}, zmm3, zmm4, 123
+
+// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0xd4,0x7b]
+ vmpsadbw zmm2 {k7} {z}, zmm3, zmm4, 123
+
+// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123
+// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x10,0x7b]
+ vmpsadbw xmm2, xmm3, xmmword ptr [eax], 123
+
+// CHECK: vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0xc4,0xe3,0x61,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vmpsadbw xmm2, xmm3, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0x51,0x7f,0x7b]
+ vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x8f,0x42,0x52,0x80,0x7b]
+ vmpsadbw xmm2 {k7} {z}, xmm3, xmmword ptr [edx - 2048], 123
+
+// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123
+// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x10,0x7b]
+ vmpsadbw ymm2, ymm3, ymmword ptr [eax], 123
+
+// CHECK: vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0xc4,0xe3,0x65,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vmpsadbw ymm2, ymm3, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0x51,0x7f,0x7b]
+ vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xaf,0x42,0x52,0x80,0x7b]
+ vmpsadbw ymm2 {k7} {z}, ymm3, ymmword ptr [edx - 4096], 123
+
+// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x10,0x7b]
+ vmpsadbw zmm2, zmm3, zmmword ptr [eax], 123
+
+// CHECK: vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vmpsadbw zmm2, zmm3, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0x51,0x7f,0x7b]
+ vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123
+// CHECK: encoding: [0x62,0xf3,0x66,0xcf,0x42,0x52,0x80,0x7b]
+ vmpsadbw zmm2 {k7} {z}, zmm3, zmmword ptr [edx - 8192], 123
+
+// YMM Rounding
+
+// CHECK: vaddpd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x58,0xd4]
+ vaddpd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x3f,0x58,0xd4]
+ vaddpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0xff,0x58,0xd4]
+ vaddpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vaddph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x58,0xd4]
+ vaddph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x3f,0x58,0xd4]
+ vaddph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0xff,0x58,0xd4]
+ vaddph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vaddps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x58,0xd4]
+ vaddps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x3f,0x58,0xd4]
+ vaddps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0xff,0x58,0xd4]
+ vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
diff --git a/llvm/test/MC/X86/avx10_2ni-64-att.s b/llvm/test/MC/X86/avx10_2ni-64-att.s
new file mode 100644
index 0000000000000..8ee4bc3f64127
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2ni-64-att.s
@@ -0,0 +1,149 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// VMPSADBW
+
+// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x83,0x46,0x00,0x42,0xf0,0x7b]
+ vmpsadbw $123, %xmm24, %xmm23, %xmm22
+
+// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0x46,0x07,0x42,0xf0,0x7b]
+ vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x46,0x87,0x42,0xf0,0x7b]
+ vmpsadbw $123, %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x83,0x46,0x20,0x42,0xf0,0x7b]
+ vmpsadbw $123, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0x46,0x27,0x42,0xf0,0x7b]
+ vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x46,0xa7,0x42,0xf0,0x7b]
+ vmpsadbw $123, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x83,0x46,0x40,0x42,0xf0,0x7b]
+ vmpsadbw $123, %zmm24, %zmm23, %zmm22
+
+// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0x46,0x47,0x42,0xf0,0x7b]
+ vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x46,0xc7,0x42,0xf0,0x7b]
+ vmpsadbw $123, %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x46,0x00,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw $123, 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x46,0x07,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw $123, 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vmpsadbw $123, (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x00,0x42,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vmpsadbw $123, (%rip), %xmm23, %xmm22
+
+// CHECK: vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x00,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vmpsadbw $123, -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0x87,0x42,0x71,0x7f,0x7b]
+ vmpsadbw $123, 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0x87,0x42,0x72,0x80,0x7b]
+ vmpsadbw $123, -2048(%rdx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x46,0x20,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw $123, 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x46,0x27,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw $123, 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vmpsadbw $123, (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x20,0x42,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vmpsadbw $123, (%rip), %ymm23, %ymm22
+
+// CHECK: vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x20,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vmpsadbw $123, -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0xa7,0x42,0x71,0x7f,0x7b]
+ vmpsadbw $123, 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0xa7,0x42,0x72,0x80,0x7b]
+ vmpsadbw $123, -4096(%rdx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x46,0x40,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vmpsadbw $123, 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x46,0x47,0x42,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vmpsadbw $123, 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vmpsadbw $123, (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x40,0x42,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vmpsadbw $123, (%rip), %zmm23, %zmm22
+
+// CHECK: vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x46,0x40,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vmpsadbw $123, -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0xc7,0x42,0x71,0x7f,0x7b]
+ vmpsadbw $123, 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x46,0xc7,0x42,0x72,0x80,0x7b]
+ vmpsadbw $123, -8192(%rdx), %zmm23, %zmm22 {%k7} {z}
+
+// YMM Rounding
+
+// CHECK: vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x58,0xf0]
+ vaddpd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x37,0x58,0xf0]
+ vaddpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0xf7,0x58,0xf0]
+ vaddpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x58,0xf0]
+ vaddph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x37,0x58,0xf0]
+ vaddph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0xf7,0x58,0xf0]
+ vaddph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x58,0xf0]
+ vaddps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x37,0x58,0xf0]
+ vaddps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0xf7,0x58,0xf0]
+ vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 4a52a58f2de1c..f31c4baada141 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -2889,6 +2889,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMOVUPSZ256rrkz, X86::VMOVUPSZ256rmkz, TB_NO_REVERSE},
{X86::VMOVUPSZrrkz, X86::VMOVUPSZrmkz, TB_NO_REVERSE},
{X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0},
+ {X86::VMPSADBWZ128rri, X86::VMPSADBWZ128rmi, 0},
+ {X86::VMPSADBWZ256rri, X86::VMPSADBWZ256rmi, 0},
+ {X86::VMPSADBWZrri, X86::VMPSADBWZrmi, 0},
{X86::VMPSADBWrri, X86::VMPSADBWrmi, 0},
{X86::VMULPDYrr, X86::VMULPDYrm, 0},
{X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0},
@@ -4709,6 +4712,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMOVUPSZ128rrk, X86::VMOVUPSZ128rmk, TB_NO_REVERSE},
{X86::VMOVUPSZ256rrk, X86::VMOVUPSZ256rmk, TB_NO_REVERSE},
{X86::VMOVUPSZrrk, X86::VMOVUPSZrmk, TB_NO_REVERSE},
+ {X86::VMPSADBWZ128rrikz, X86::VMPSADBWZ128rmikz, 0},
+ {X86::VMPSADBWZ256rrikz, X86::VMPSADBWZ256rmikz, 0},
+ {X86::VMPSADBWZrrikz, X86::VMPSADBWZrmikz, 0},
{X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0},
{X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0},
{X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0},
@@ -6097,6 +6103,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE},
{X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE},
{X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE},
+ {X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0},
+ {X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0},
+ {X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0},
{X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0},
{X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0},
{X86::VMULPDZrrk, X86::VMULPDZrmk, 0},
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 7d28c48055c34..b0acd4ea4224a 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -575,6 +575,31 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_EVEX_W_NF:
case IC_EVEX_W_B_NF:
return false;
+ case IC_EVEX_B_U:
+ case IC_EVEX_XS_B_U:
+ case IC_EVEX_XD_B_U:
+ case IC_EVEX_OPSIZE_B_U:
+ case IC_EVEX_W_B_U:
+ case IC_EVEX_W_XS_B_U:
+ case IC_EVEX_W_XD_B_U:
+ case IC_EVEX_W_OPSIZE_B_U:
+ case IC_EVEX_K_B_U:
+ case IC_EVEX_XS_K_B_U:
+ case IC_EVEX_XD_K_B_U:
+ case IC_EVEX_OPSIZE_K_B_U:
+ case IC_EVEX_W_K_B_U:
+ case IC_EVEX_W_XS_K_B_U:
+ case IC_EVEX_W_XD_K_B_U:
+ case IC_EVEX_W_OPSIZE_K_B_U:
+ case IC_EVEX_KZ_B_U:
+ case IC_EVEX_XS_KZ_B_U:
+ case IC_EVEX_XD_KZ_B_U:
+ case IC_EVEX_OPSIZE_KZ_B_U:
+ case IC_EVEX_W_KZ_B_U:
+ case IC_EVEX_W_XS_KZ_B_U:
+ case IC_EVEX_W_XD_KZ_B_U:
+ case IC_EVEX_W_OPSIZE_KZ_B_U:
+ return false;
default:
errs() << "Unknown instruction class: "
<< stringForContext((InstructionContext)parent) << "\n";
@@ -926,7 +951,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
else
o << "IC_VEX";
- if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2))
+ if ((index & ATTR_EVEXB) && (index & ATTR_EVEXU))
+ ; // Ignore ATTR_VEXL and ATTR_EVEXL2 under YMM rounding.
+ else if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2))
o << "_L2";
else if (index & ATTR_VEXL)
o << "_L";
@@ -949,6 +976,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_EVEXB)
o << "_B";
+
+ if ((index & ATTR_EVEXB) && (index & ATTR_EVEXU))
+ o << "_U";
}
} else if ((index & ATTR_64BIT) && (index & ATTR_REX2))
o << "IC_64BIT_REX2";
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index 58f5449f3b27b..f0154b80a80db 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -77,6 +77,10 @@ ENTRY(VMOVDQU16Z256rr, VMOVDQUYrr)
ENTRY(VMOVDQU8Z256mr, VMOVDQUYmr)
ENTRY(VMOVDQU8Z256rm, VMOVDQUYrm)
ENTRY(VMOVDQU8Z256rr, VMOVDQUYrr)
+ENTRY(VMPSADBWZ128rmi, VMPSADBWrmi)
+ENTRY(VMPSADBWZ128rri, VMPSADBWrri)
+ENTRY(VMPSADBWZ256rmi, VMPSADBWYrmi)
+ENTRY(VMPSADBWZ256rri, VMPSADBWYrri)
ENTRY(VSHUFF32X4Z256rmi, VPERM2F128rm)
ENTRY(VSHUFF32X4Z256rri, VPERM2F128rr)
ENTRY(VSHUFF64X2Z256rmi, VPERM2F128rm)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index a2bc037b690c6..6aae57eca89d3 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -126,6 +126,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) {
HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z");
HasEVEX_B = Rec->getValueAsBit("hasEVEX_B");
+ HasEVEX_U = Rec->getValueAsBit("hasEVEX_U");
HasEVEX_NF = Rec->getValueAsBit("hasEVEX_NF");
HasTwoConditionalOps = Rec->getValueAsBit("hasTwoConditionalOps");
IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
@@ -191,6 +192,8 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
#define EVEX_NF(n) (HasEVEX_NF ? n##_NF : n)
#define EVEX_B_NF(n) (HasEVEX_B ? EVEX_NF(n##_B) : EVEX_NF(n))
#define EVEX_KB_ADSIZE(n) AdSize == X86Local::AdSize32 ? n##_ADSIZE : EVEX_KB(n)
+#define EVEX_KB_U(n) \
+ (HasEVEX_KZ ? n##_KZ_B_U : (HasEVEX_K ? n##_K_B_U : n##_B_U))
InstructionContext RecognizableInstr::insnContext() const {
InstructionContext insnContext;
@@ -200,7 +203,28 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n";
llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
}
- if (HasEVEX_NF) {
+ if (EncodeRC && HasEVEX_U) {
+ // EVEX_U
+ if (HasREX_W) {
+ if (OpPrefix == X86Local::PD)
+ insnContext = EVEX_KB_U(IC_EVEX_W_OPSIZE);
+ else if (OpPrefix == X86Local::XS)
+ insnContext = EVEX_KB_U(IC_EVEX_W_XS);
+ else if (OpPrefix == X86Local::XD)
+ insnContext = EVEX_KB_U(IC_EVEX_W_XD);
+ else if (OpPrefix == X86Local::PS)
+ insnContext = EVEX_KB_U(IC_EVEX_W);
+ } else {
+ if (OpPrefix == X86Local::PD)
+ insnContext = EVEX_KB_U(IC_EVEX_OPSIZE);
+ else if (OpPrefix == X86Local::XS)
+ insnContext = EVEX_KB_U(IC_EVEX_XS);
+ else if (OpPrefix == X86Local::XD)
+ insnContext = EVEX_KB_U(IC_EVEX_XD);
+ else if (OpPrefix == X86Local::PS)
+ insnContext = EVEX_KB_U(IC_EVEX);
+ }
+ } else if (HasEVEX_NF) {
if (OpPrefix == X86Local::PD)
insnContext = EVEX_B_NF(IC_EVEX_OPSIZE);
else if (HasREX_W)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index 12fb41750cb3f..eb2cee7bbbf87 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -214,6 +214,8 @@ struct RecognizableInstrBase {
bool HasEVEX_KZ;
/// The hasEVEX_B field from the record
bool HasEVEX_B;
+ /// The hasEVEX_U field from the record
+ bool HasEVEX_U;
/// The hasEVEX_NF field from the record
bool HasEVEX_NF;
/// The hasTwoConditionalOps field from the record
>From f89b7467095f9e522252de278ece4acb2796d105 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 29 Jul 2024 10:51:29 +0800
Subject: [PATCH 2/2] Support AVX10.2-BF16 new instructions.
---
clang/include/clang/Basic/BuiltinsX86.def | 60 +
clang/lib/Basic/Targets/X86.cpp | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 23 +
clang/lib/Headers/CMakeLists.txt | 2 +
clang/lib/Headers/avx10_2_512bf16intrin.h | 565 +++
clang/lib/Headers/avx10_2bf16intrin.h | 1088 ++++++
clang/lib/Headers/immintrin.h | 2 +
.../CodeGen/X86/avx10_2_512bf16-builtins.c | 1054 ++++++
clang/test/CodeGen/X86/avx10_2bf16-builtins.c | 2018 +++++++++++
llvm/include/llvm/IR/IntrinsicsX86.td | 410 +++
.../lib/Target/X86/AsmParser/X86AsmParser.cpp | 6 +-
.../X86/MCTargetDesc/X86ATTInstPrinter.cpp | 12 +-
.../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 11 +
.../X86/MCTargetDesc/X86IntelInstPrinter.cpp | 9 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 37 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 310 ++
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 10 +
llvm/lib/Target/X86/X86InstrUtils.td | 6 +-
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 30 +
.../test/CodeGen/X86/avx10_2_512bf16-arith.ll | 587 ++++
.../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 296 ++
llvm/test/CodeGen/X86/avx10_2bf16-arith.ll | 1168 +++++++
.../CodeGen/X86/avx10_2bf16-intrinsics.ll | 536 +++
.../MC/Disassembler/X86/avx10.2-bf16-32.txt | 3015 +++++++++++++++++
.../MC/Disassembler/X86/avx10.2-bf16-64.txt | 3015 +++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-32-att.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-32-intel.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-64-att.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-64-intel.s | 3014 ++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 494 +++
llvm/utils/TableGen/X86DisassemblerTables.cpp | 4 +-
31 files changed, 26814 insertions(+), 11 deletions(-)
create mode 100644 clang/lib/Headers/avx10_2_512bf16intrin.h
create mode 100644 clang/lib/Headers/avx10_2bf16intrin.h
create mode 100644 clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
create mode 100644 clang/test/CodeGen/X86/avx10_2bf16-builtins.c
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-intel.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-intel.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..779ecf20986a9 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2022,6 +2022,66 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
+TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
+
#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 3fb3587eb5914..dcb998aeb41a9 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -306,6 +306,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX10_1_512 = true;
} else if (Feature == "+avx10.2-256") {
HasAVX10_2 = true;
+ HasFullBFloat16 = true;
} else if (Feature == "+avx10.2-512") {
HasAVX10_2_512 = true;
} else if (Feature == "+avx512cd") {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b2cab812985af..fed8223ffbe89 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14631,6 +14631,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storeups512_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_storesbf16128_mask:
case X86::BI__builtin_ia32_storesh128_mask:
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask:
@@ -14727,6 +14728,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddph512_mask:
case X86::BI__builtin_ia32_vfmaddph512_maskz:
case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddnepbh128:
+ case X86::BI__builtin_ia32_vfmaddnepbh256:
+ case X86::BI__builtin_ia32_vfmaddnepbh512:
case X86::BI__builtin_ia32_vfmaddps512_mask:
case X86::BI__builtin_ia32_vfmaddps512_maskz:
case X86::BI__builtin_ia32_vfmaddps512_mask3:
@@ -14787,6 +14791,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi512_mask:
return EmitX86MaskedLoad(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_loadsbf16128_mask:
case X86::BI__builtin_ia32_loadsh128_mask:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
@@ -15941,6 +15946,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
+ case X86::BI__builtin_ia32_vsqrtnepbf16256:
+ case X86::BI__builtin_ia32_vsqrtnepbf16:
+ case X86::BI__builtin_ia32_vsqrtnepbf16512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
if (Ops.size() == 2) {
@@ -16160,6 +16168,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclassps128_mask:
case X86::BI__builtin_ia32_fpclassps256_mask:
case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
case X86::BI__builtin_ia32_fpclassph128_mask:
case X86::BI__builtin_ia32_fpclassph256_mask:
case X86::BI__builtin_ia32_fpclassph512_mask:
@@ -16174,6 +16185,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
+ break;
+ case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
+ break;
+ case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
+ break;
case X86::BI__builtin_ia32_fpclassph128_mask:
ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
break;
@@ -16329,6 +16349,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmppd128_mask:
case X86::BI__builtin_ia32_cmppd256_mask:
case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_vcmppbf16512_mask:
+ case X86::BI__builtin_ia32_vcmppbf16256_mask:
+ case X86::BI__builtin_ia32_vcmppbf16128_mask:
IsMaskFCmp = true;
[[fallthrough]];
case X86::BI__builtin_ia32_cmpps:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b17ab24d625a0..090db1abd7005 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,7 +147,9 @@ set(x86_files
amxcomplexintrin.h
amxfp16intrin.h
amxintrin.h
+ avx10_2_512bf16intrin.h
avx10_2_512niintrin.h
+ avx10_2bf16intrin.h
avx10_2niintrin.h
avx2intrin.h
avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
new file mode 100644
index 0000000000000..42b1790fee37a
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -0,0 +1,565 @@
+/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2_512BF16INTRIN_H
+#define __AVX10_2_512BF16INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
+ __min_vector_width__(512)))
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
+ return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
+ return (__m512bh)__builtin_ia32_undef512();
+}
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
+ return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
+ __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
+ __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
+ __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
+ __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
+ __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
+ bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
+ bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
+ bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
+}
+
+#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
+ bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
+ bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
+ bf29, bf30, bf31, bf32) \
+ _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
+ (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
+ (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
+ (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
+ (bf3), (bf2), (bf1))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_ps(__m512bh __a) {
+ return (__m512)__a;
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_pd(__m512bh __a) {
+ return (__m512d)__a;
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_si512(__m512bh __a) {
+ return (__m512i)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpd_pbh(__m512d __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castsi512_pbh(__m512i __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16512_pbh128(__m512bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16512_pbh256(__m512bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16128_pbh512(__m128bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16256_pbh512(__m256bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_zextpbf16128_pbh512(__m128bh __a) {
+ return __builtin_shufflevector(
+ __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_zextpbf16256_pbh512(__m256bh __a) {
+ return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
+ return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
+ (__m512i)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_load_pbh(void const *__p) {
+ return *(const __m512bh *)__p;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m512bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
+ __m512bh __A) {
+ *(__m512bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
+ __m512bh __A) {
+ struct __storeu_pbh {
+ __m512bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
+ return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
+ (__v32hi)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_addne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A + (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_subne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A - (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A * (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_divne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A / (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
+ __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
+ __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+#define _mm512_cmp_pbh_mask(A, B, P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
+ (__v32bf)(__m512bh)(B), \
+ (int)(P), (__mmask32) - 1))
+
+#define _mm512_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
+ (__v32bf)(__m512bh)(B), \
+ (int)(P), (__mmask32)(U)))
+
+#define _mm512_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32)(U)))
+
+#define _mm512_fpclass_pbh_mask(A, imm) \
+ ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32) - 1))
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
+ (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
+ __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
+ (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W,
+ (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_getexp_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_rsqrt_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+#define _mm512_reducene_pbh(A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
+ (__mmask32) - 1))
+
+#define _mm512_mask_reducene_pbh(W, U, A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)(__m512bh)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_reducene_pbh(U, A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(U)))
+
+#define _mm512_roundscalene_pbh(A, B) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(A), (int)(B), (__v32bf)(__m512bh)(A), \
+ (__mmask32) - 1))
+
+#define _mm512_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(C), (int)(imm), (__v32bf)(__m512bh)(A), \
+ (__mmask32)(B)))
+
+#define _mm512_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(B), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(A)))
+
+#define _mm512_getmant_pbh(A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
+ (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
+
+#define _mm512_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), (__v32bf)(__m512bh)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_getmant_pbh(U, A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
+ (__v32bf)_mm512_setzero_pbh(), (__mmask32)(U)))
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
+ (__v32bf)_mm512_sqrt_pbh(__A),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
+ -(__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
+ -(__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif
+#endif
\ No newline at end of file
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
new file mode 100644
index 0000000000000..b3ece3d7d79cb
--- /dev/null
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -0,0 +1,1088 @@
+/*===---------- avx10_2bf16intrin.h - AVX10-BF16 intrinsics --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2BF16INTRIN_H
+#define __AVX10_2BF16INTRIN_H
+
+typedef __bf16 __m128bh_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1)));
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) {
+ return __builtin_bit_cast(__m256bh, _mm256_setzero_ps());
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_setzero_pbh(void) {
+ return __builtin_bit_cast(__m128bh, _mm_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castpbf16_ps(__m128bh __a) {
+ return (__m128)__a;
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_ps(__m256bh __a) {
+ return (__m256)__a;
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_pd(__m256bh __a) {
+ return (__m256d)__a;
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castpbf16_pd(__m128bh __a) {
+ return (__m128d)__a;
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_castpbf16_si128(__m128bh __a) {
+ return (__m128i)__a;
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_si256(__m256bh __a) {
+ return (__m256i)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castps_pbh(__m128 __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_castps_pbh(__m256 __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtsbh_bf16(__m128bh __a) {
+ return __a[0];
+}
+
+static __inline__ __bf16 __DEFAULT_FN_ATTRS256
+_mm256_cvtsbh_bf16(__m256bh __a) {
+ return __a[0];
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castpd_pbh(__m128d __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castpd_pbh(__m256d __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_castsi128_pbh(__m128i __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castsi256_pbh(__m256i __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS256
+_mm256_castpbf16256_pbh128(__m256bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castpbf16128_pbh256(__m128bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_zextpbf16128_pbh256(__m128bh __a) {
+ return __builtin_shufflevector(__a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_undefined_pbh(void) {
+ return (__m256bh)__builtin_ia32_undef256();
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_load_sbh(void const *__dp) {
+ __m128bh src = (__v8bf)_mm_setzero_pbh();
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__dp, src,
+ 1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_load_sbh(__m128bh __W, __mmask8 __U, const void *__A) {
+ __m128bh src = (__v8bf)__builtin_shufflevector(
+ (__v8bf)__W, (__v8bf)_mm_setzero_pbh(), 0, 8, 8, 8, 8, 8, 8, 8);
+
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__A, src,
+ __U & 1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_load_sbh(__mmask8 __U, const void *__A) {
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask(
+ (const __v8bf *)__A, (__v8bf)_mm_setzero_pbh(), __U & 1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_load_pbh(void const *__p) {
+ return *(const __m256bh *)__p;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_load_pbh(void const *__p) {
+ return *(const __m128bh *)__p;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m256bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m128bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sbh(void *__dp,
+ __m128bh __a) {
+ struct __mm_store_sbh_struct {
+ __bf16 __u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_store_sbh_struct *)__dp)->__u = __a[0];
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sbh(void *__W,
+ __mmask8 __U,
+ __m128bh __A) {
+ __builtin_ia32_storesbf16128_mask((__v8bf *)__W, __A, __U & 1);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_pbh(void *__P,
+ __m256bh __A) {
+ *(__m256bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_pbh(void *__P,
+ __m128bh __A) {
+ *(__m128bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_pbh(void *__P,
+ __m256bh __A) {
+ struct __storeu_pbh {
+ __m256bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_pbh(void *__P,
+ __m128bh __A) {
+ struct __storeu_pbh {
+ __m128bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
+ __m128bh __b) {
+ __a[0] = __b[0];
+ return __a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
+ _mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_undefined_pbh(void) {
+ return (__m128bh)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_set_sbh(__bf16 bf) {
+ return (__v8bf)__builtin_shufflevector(
+ (__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}, (__v8bf)_mm_setzero_pbh(), 0, 8,
+ 8, 8, 8, 8, 8, 8);
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_set1_pbh(__bf16 bf) {
+ return (__m128bh)(__v8bf){bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set1_pbh(__bf16 bf) {
+ return (__m256bh)(__v16bf){bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128
+_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5,
+ __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ return (__m128bh)(__v8bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8};
+}
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set_pbh(
+ __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
+ __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ return (__m256bh)(__v16bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16};
+}
+
+#define _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8) \
+ _mm_set_pbh((bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), (bf1))
+
+#define _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
+ bf11, bf12, bf13, bf14, bf15, bf16) \
+ _mm256_set_pbh((bf16), (bf15), (bf14), (bf13), (bf12), (bf11), (bf10), \
+ (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), \
+ (bf1))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_abs_pbh(__m256bh __A) {
+ return (__m256bh)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF),
+ (__m256i)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_abs_pbh(__m128bh __A) {
+ return (__m128bh)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)__W,
+ (__v8bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
+ return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)__W,
+ (__v16bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
+ (__v8hi)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
+ (__v16hi)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_addne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A + (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_addne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A + (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_addne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_addne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_addne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_addne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_subne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A - (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_subne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A - (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_subne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_subne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_subne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_subne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mulne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A * (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mulne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A * (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_mulne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_mulne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_mulne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_mulne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_divne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A / (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_divne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A / (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_divne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_divne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_divne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_divne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_max_pbh(__m256bh __A,
+ __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vmaxpbf16256((__v16bf)__A, (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_max_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vmaxpbf16128((__v8bf)__A, (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_max_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_max_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_min_pbh(__m256bh __A,
+ __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vminpbf16256((__v16bf)__A, (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_min_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vminpbf16128((__v8bf)__A, (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_min_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comeqsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16eq((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comltsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16lt((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comlesbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16le((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgtsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16gt((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgesbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16ge((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B);
+}
+
+#define _mm256_cmp_pbh_mask(A, B, P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
+ (__v16bf)(__m256bh)(B), \
+ (int)(P), (__mmask16) - 1))
+
+#define _mm256_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
+ (__v16bf)(__m256bh)(B), \
+ (int)(P), (__mmask16)(U)))
+
+#define _mm_cmp_pbh_mask(A, B, P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8) - 1))
+
+#define _mm_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8)(U)))
+
+#define _mm256_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16)(U)))
+
+#define _mm256_fpclass_pbh_mask(A, imm) \
+ ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16) - 1))
+
+#define _mm_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
+ (int)(imm), (__mmask8)(U)))
+
+#define _mm_fpclass_pbh_mask(A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
+ (int)(imm), (__mmask8) - 1))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_scalef_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_undefined_pbh(),
+ (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pbh(
+ __m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_setzero_pbh(),
+ (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_scalef_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rcp_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask((__v16bf)__A, (__v16bf)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rcp_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_getexp_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_getexp_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_rsqrt_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rsqrt_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+#define _mm256_reducene_pbh(A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \
+ (__mmask16) - 1))
+
+#define _mm256_mask_reducene_pbh(W, U, A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)(__m256bh)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_reducene_pbh(U, A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(U)))
+
+#define _mm_reducene_pbh(A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_reducene_pbh(W, U, A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)(__m128bh)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_reducene_pbh(U, A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(U)))
+
+#define _mm256_roundscalene_pbh(A, B) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(B), (__v16bf)(__m256bh)(A), \
+ (__mmask16) - 1))
+
+#define _mm256_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(C), (int)(imm), (__v16bf)(__m256bh)(A), \
+ (__mmask16)(B)))
+
+#define _mm256_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(B), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(A)))
+
+#define _mm_roundscalene_pbh(A, B) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(B), (__v8bf)(__m128bh)(A), (__mmask8) - 1))
+
+#define _mm_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(C), (int)(imm), (__v8bf)(__m128bh)(A), \
+ (__mmask8)(B)))
+
+#define _mm_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(B), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(A)))
+
+#define _mm256_getmant_pbh(A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
+ (__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1))
+
+#define _mm256_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), (__v16bf)(__m256bh)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_getmant_pbh(U, A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
+ (__v16bf)_mm256_setzero_pbh(), (__mmask16)(U)))
+
+#define _mm_getmant_pbh(A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
+ (__v8bf)_mm_undefined_pbh(), (__mmask8) - 1))
+
+#define _mm_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), (__v8bf)(__m128bh)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_pbh(U, A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
+ (__v8bf)_mm_setzero_pbh(), (__mmask8)(U)))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vsqrtnepbf16256((__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_sqrt_pbh(__A), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U,
+ (__v16bf)_mm256_sqrt_pbh(__A),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vsqrtnepbf16((__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
+ -(__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmaddne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmaddne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmaddne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
+ -(__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsubne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsubne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsubne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmaddne_pbh(__m128bh __A,
+ __m128bh __B,
+ __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsubne_pbh(__m128bh __A,
+ __m128bh __B,
+ __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
+ -(__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
+ -(__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif
+#endif
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index e0957257ed5c7..eaa0bf06c128b 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -649,10 +649,12 @@ _storebe_i64(void * __P, long long __D) {
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
+#include <avx10_2bf16intrin.h>
#include <avx10_2niintrin.h>
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
+#include <avx10_2_512bf16intrin.h>
#include <avx10_2_512niintrin.h>
#endif
diff --git a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
new file mode 100644
index 0000000000000..8274decc721e9
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
@@ -0,0 +1,1054 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512bh test_mm512_setzero_pbh() {
+ // CHECK-LABEL: @test_mm512_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm512_setzero_pbh();
+}
+
+__m512bh test_mm512_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm512_undefined_pbh
+ // CHECK: ret <32 x bfloat> zeroinitializer
+ return _mm512_undefined_pbh();
+}
+
+__m512bh test_mm512_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm512_set1_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_set1_pbh(h);
+}
+
+__m512bh test_mm512_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16,
+ __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20,
+ __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24,
+ __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28,
+ __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ // CHECK-LABEL: @test_mm512_set_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16,
+ bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24,
+ bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32);
+}
+
+__m512bh test_mm512_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16,
+ __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20,
+ __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24,
+ __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28,
+ __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ // CHECK-LABEL: @test_mm512_setr_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16,
+ bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24,
+ bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32);
+}
+
+__m512 test_mm512_castpbf16_ps(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_ps
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <16 x float>
+ return _mm512_castpbf16_ps(A);
+}
+
+__m512d test_mm512_castpbf16_pd(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_pd
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x double>
+ return _mm512_castpbf16_pd(A);
+}
+
+__m512i test_mm512_castpbf16_si512(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_si512
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x i64>
+ return _mm512_castpbf16_si512(A);
+}
+
+__m512bh test_mm512_castps_pbh(__m512 A) {
+ // CHECK-LABEL: test_mm512_castps_pbh
+ // CHECK: bitcast <16 x float> %{{.*}} to <32 x bfloat>
+ return _mm512_castps_pbh(A);
+}
+
+__m512bh test_mm512_castpd_pbh(__m512d A) {
+ // CHECK-LABEL: test_mm512_castpd_pbh
+ // CHECK: bitcast <8 x double> %{{.*}} to <32 x bfloat>
+ return _mm512_castpd_pbh(A);
+}
+
+__m512bh test_mm512_castsi512_pbh(__m512i A) {
+ // CHECK-LABEL: test_mm512_castsi512_pbh
+ // CHECK: bitcast <8 x i64> %{{.*}} to <32 x bfloat>
+ return _mm512_castsi512_pbh(A);
+}
+
+__m128bh test_mm512_castpbf16512_pbh128(__m512bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16512_pbh128
+ // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ return _mm512_castpbf16512_pbh128(__a);
+}
+
+__m256bh test_mm512_castpbf16512_pbh256(__m512bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16512_pbh256
+ // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm512_castpbf16512_pbh256(__a);
+}
+
+__m512bh test_mm512_castpbf16128_pbh512(__m128bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16128_pbh512
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm512_castpbf16128_pbh512(__a);
+}
+
+__m512bh test_mm512_castpbf16256_pbh512(__m256bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16256_pbh512
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm512_castpbf16256_pbh512(__a);
+}
+
+__m512bh test_mm512_zextpbf16128_pbh512(__m128bh __a) {
+ // CHECK-LABEL: test_mm512_zextpbf16128_pbh512
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm512_zextpbf16128_pbh512(__a);
+}
+
+__m512bh test_mm512_zextpbf16256_pbh512(__m256bh __a) {
+ // CHECK-LABEL: test_mm512_zextpbf16256_pbh512
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> {{.*}}, <32 x i32>
+ return _mm512_zextpbf16256_pbh512(__a);
+}
+
+__m512bh test_mm512_abs_pbh(__m512bh a) {
+ // CHECK-LABEL: @test_mm512_abs_pbh
+ // CHECK: and <16 x i32>
+ return _mm512_abs_pbh(a);
+}
+
+// VMOVSH
+
+__m512bh test_mm512_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm512_load_pbh
+ // CHECK: load <32 x bfloat>, ptr %{{.*}}, align 64
+ return _mm512_load_pbh(p);
+}
+
+__m512bh test_mm512_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm512_loadu_pbh
+ // CHECK: load <32 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm512_loadu_pbh(p);
+}
+
+void test_mm512_store_pbh(void *p, __m512bh a) {
+ // CHECK-LABEL: @test_mm512_store_pbh
+ // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 64
+ _mm512_store_pbh(p, a);
+}
+
+void test_mm512_storeu_pbh(void *p, __m512bh a) {
+ // CHECK-LABEL: @test_mm512_storeu_pbh
+ // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm512_storeu_pbh(p, a);
+}
+
+__m512bh test_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
+ // CHECK-LABEL: @test_mm512_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: %{{.*}} = select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_blend_pbh(__U, __A, __W);
+}
+
+__m512bh test_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat>
+ return _mm512_permutex2var_pbh(__A, __I, __B);
+}
+
+__m512bh test_mm512_permutexvar_epi16(__m512i __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_permutexvar_epi16
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat>
+ return _mm512_permutexvar_pbh(__A, __B);
+}
+
+__m512bh test_mm512_addne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_addne_pbh
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_addne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_subne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_subne_pbh
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_subne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_mulne_pbh
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mulne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_divne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_divne_pbh
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_divne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_max_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512(
+ return _mm512_max_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_max_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_min_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16512(
+ return _mm512_min_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_min_pbh(__U, __A, __B);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: @test_mm512_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_lt_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_le_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_unord_q(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nlt_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nle_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ord_q(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nge_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ngt_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_false_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ge_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_gt_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_true_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_lt_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_le_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_unord_s(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nlt_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nle_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ord_s(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nge_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ngt_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_false_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ge_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_gt_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_true_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: @test_mm512_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_lt_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_le_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_unord_q(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nle_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ord_q(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nge_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_false_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ge_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_gt_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_true_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_lt_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_le_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_unord_s(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nle_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ord_s(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nge_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_false_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ge_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_gt_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_true_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask32 test_mm512_mask_fpclass_pbh_mask(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512
+ return _mm512_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask32 test_mm512_fpclass_pbh_mask(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512
+ return _mm512_fpclass_pbh_mask(__A, 4);
+}
+
+__m512bh test_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_scalef_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_scalef_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_rcp_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return _mm512_rcp_pbh(__A);
+}
+
+__m512bh test_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return (__m512bh)_mm512_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return _mm512_maskz_rcp_pbh(__U, __A);
+}
+
+__m512bh test_mm512_getexp_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_getexp_pbh(__A);
+}
+
+__m512bh test_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_maskz_getexp_pbh(__U, __A);
+}
+
+__m512bh test_mm512_rsqrt_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return _mm512_rsqrt_pbh(__A);
+}
+
+__m512bh test_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return (__m512bh)_mm512_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return _mm512_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m512bh test_mm512_reducene_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_reducene_pbh(__A, 3);
+}
+
+__m512bh test_mm512_mask_reducene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m512bh test_mm512_maskz_reducene_pbh(__mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m512bh test_mm512_roundscalene_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_roundscalene_pbh(__A, 3);
+}
+
+__m512bh test_mm512_mask_roundscalene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m512bh test_mm512_maskz_roundscalene_pbh(__mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m512bh test_mm512_getmant_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_mask_getmant_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_maskz_getmant_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_sqrt_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ return _mm512_sqrt_pbh(__A);
+}
+
+__m512bh test_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ // CHECK: bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return (__m512bh)_mm512_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ // CHECK: bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_sqrt_pbh(__U, __A);
+}
+
+__m512bh test_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fmaddne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fmsubne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fnmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fnmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fnmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fnmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
new file mode 100644
index 0000000000000..42185e970e9db
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@@ -0,0 +1,2018 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256bh test_mm256_setzero_pbh() {
+ // CHECK-LABEL: @test_mm256_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm256_setzero_pbh();
+}
+
+__m128bh test_mm_setzero_pbh() {
+ // CHECK-LABEL: @test_mm_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm_setzero_pbh();
+}
+
+__m256bh test_mm256_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm256_undefined_pbh
+ // CHECK: ret <16 x bfloat> zeroinitializer
+ return _mm256_undefined_pbh();
+}
+
+__m128bh test_mm_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm_undefined_pbh
+ // CHECK: ret <8 x bfloat> zeroinitializer
+ return _mm_undefined_pbh();
+}
+
+__bf16 test_mm_cvtsbh_bf16(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_cvtsbh_bf16
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ return _mm_cvtsbh_bf16(__A);
+}
+
+__bf16 test_mm256_cvtsbh_bf16(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_cvtsbh_bf16
+ // CHECK: extractelement <16 x bfloat> %{{.*}}, i32 0
+ return _mm256_cvtsbh_bf16(__A);
+}
+
+__m128bh test_mm_set_sbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm_set_sbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 7
+ return _mm_set_sbh(h);
+}
+
+__m128bh test_mm_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm_set1_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_set1_pbh(h);
+}
+
+__m256bh test_mm256_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm256_set1_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_set1_pbh(h);
+}
+
+__m128bh test_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ // CHECK-LABEL: @test_mm_set_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
+}
+
+__m256bh test_mm256_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ // CHECK-LABEL: @test_mm256_set_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
+}
+
+__m128bh test_mm_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ // CHECK-LABEL: @test_mm_setr_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
+}
+
+__m256bh test_mm256_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ // CHECK-LABEL: @test_mm256_setr_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
+}
+
+__m128 test_mm_castpbf16_ps(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_ps
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <4 x float>
+ return _mm_castpbf16_ps(A);
+}
+
+__m256 test_mm256_castpbf16_ps(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_ps
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <8 x float>
+ return _mm256_castpbf16_ps(A);
+}
+
+__m128i test_mm_castpbf16_si128(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_si128
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x i64>
+ return _mm_castpbf16_si128(A);
+}
+
+__m256i test_mm256_castpbf16_si256(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_si256
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x i64>
+ return _mm256_castpbf16_si256(A);
+}
+
+__m128bh test_mm_castps_pbh(__m128 A) {
+ // CHECK-LABEL: test_mm_castps_pbh
+ // CHECK: bitcast <4 x float> %{{.*}} to <8 x bfloat>
+ return _mm_castps_pbh(A);
+}
+
+__m256bh test_mm256_castps_pbh(__m256 A) {
+ // CHECK-LABEL: test_mm256_castps_pbh
+ // CHECK: bitcast <8 x float> %{{.*}} to <16 x bfloat>
+ return _mm256_castps_pbh(A);
+}
+
+__m128bh test_mm_castpd_pbh(__m128d A) {
+ // CHECK-LABEL: test_mm_castpd_pbh
+ // CHECK: bitcast <2 x double> %{{.*}} to <8 x bfloat>
+ return _mm_castpd_pbh(A);
+}
+
+__m256bh test_mm256_castpd_pbh(__m256d A) {
+ // CHECK-LABEL: test_mm256_castpd_pbh
+ // CHECK: bitcast <4 x double> %{{.*}} to <16 x bfloat>
+ return _mm256_castpd_pbh(A);
+}
+
+__m128bh test_mm_castsi128_pbh(__m128i A) {
+ // CHECK-LABEL: test_mm_castsi128_pbh
+ // CHECK: bitcast <2 x i64> %{{.*}} to <8 x bfloat>
+ return _mm_castsi128_pbh(A);
+}
+
+__m256bh test_mm256_castsi256_pbh(__m256i A) {
+ // CHECK-LABEL: test_mm256_castsi256_pbh
+ // CHECK: bitcast <4 x i64> %{{.*}} to <16 x bfloat>
+ return _mm256_castsi256_pbh(A);
+}
+
+__m128d test_mm_castpbf16_pd(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_pd
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x double>
+ return _mm_castpbf16_pd(A);
+}
+
+__m128bh test_mm256_castpbf16256_pbh128(__m256bh __a) {
+ // CHECK-LABEL: test_mm256_castpbf16256_pbh128
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ return _mm256_castpbf16256_pbh128(__a);
+}
+
+__m256bh test_mm256_castpbf16128_pbh256(__m128bh __a) {
+ // CHECK-LABEL: test_mm256_castpbf16128_pbh256
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm256_castpbf16128_pbh256(__a);
+}
+
+__m256d test_mm256_castpbf16_pd(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_pd
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x double>
+ return _mm256_castpbf16_pd(A);
+}
+
+__m256bh test_mm256_zextpbf16128_pbh256(__m128bh __a) {
+ // CHECK-LABEL: test_mm256_zextpbf16128_pbh256
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm256_zextpbf16128_pbh256(__a);
+}
+
+__m128bh test_mm_abs_pbh(__m128bh a) {
+ // CHECK-LABEL: @test_mm_abs_pbh
+ // CHECK: and <4 x i32>
+ return _mm_abs_pbh(a);
+}
+
+__m256bh test_mm256_abs_pbh(__m256bh a) {
+ // CHECK-LABEL: @test_mm256_abs_pbh
+ // CHECK: and <8 x i32>
+ return _mm256_abs_pbh(a);
+}
+
+__m256bh test_mm256_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm256_loadu_pbh
+ // CHECK: load <16 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm256_loadu_pbh(p);
+}
+
+__m128bh test_mm_load_sbh(void const *A) {
+ // CHECK-LABEL: test_mm_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x bfloat> %{{.*}})
+ return _mm_load_sbh(A);
+}
+
+__m256bh test_mm256_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm256_load_pbh
+ // CHECK: load <16 x bfloat>, ptr %{{.*}}, align 32
+ return _mm256_load_pbh(p);
+}
+
+__m128bh test_mm_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm_load_pbh
+ // CHECK: load <8 x bfloat>, ptr %{{.*}}, align 16
+ return _mm_load_pbh(p);
+}
+
+__m128bh test_mm_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm_loadu_pbh
+ // CHECK: load <8 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm_loadu_pbh(p);
+}
+
+void test_mm_store_sbh(void *A, __m128bh B) {
+ // CHECK-LABEL: test_mm_store_sbh
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: store bfloat %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ _mm_store_sbh(A, B);
+}
+
+void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_store_sbh
+ // CHECK: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
+ _mm_mask_store_sbh(__P, __U, __A);
+}
+
+void test_mm256_store_pbh(void *p, __m256bh a) {
+ // CHECK-LABEL: @test_mm256_store_pbh
+ // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 32
+ _mm256_store_pbh(p, a);
+}
+
+void test_mm_store_pbh(void *p, __m128bh a) {
+ // CHECK-LABEL: @test_mm_store_pbh
+ // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 16
+ _mm_store_pbh(p, a);
+}
+
+__m128bh test_mm_mask_load_sbh(__m128bh __A, __mmask8 __U, const void *__W) {
+ // CHECK-LABEL: @test_mm_mask_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_mask_load_sbh(__A, __U, __W);
+}
+
+__m128bh test_mm_maskz_load_sbh(__mmask8 __U, const void *__W) {
+ // CHECK-LABEL: @test_mm_maskz_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_maskz_load_sbh(__U, __W);
+}
+
+void test_mm256_storeu_pbh(void *p, __m256bh a) {
+ // CHECK-LABEL: @test_mm256_storeu_pbh
+ // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm256_storeu_pbh(p, a);
+}
+
+void test_mm_storeu_pbh(void *p, __m128bh a) {
+ // CHECK-LABEL: @test_mm_storeu_pbh
+ // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_pbh(p, a);
+}
+
+__m128bh test_mm_move_sbh(__m128bh A, __m128bh B) {
+ // CHECK-LABEL: test_mm_move_sbh
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0
+ return _mm_move_sbh(A, B);
+}
+
+__m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mask_move_sbh
+ // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
+ // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
+ // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
+ // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
+ return _mm_mask_move_sbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_maskz_move_sbh
+ // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
+ // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
+ // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
+ // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
+ return _mm_maskz_move_sbh(__U, __A, __B);
+}
+
+__m128bh test_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) {
+ // CHECK-LABEL: @test_mm_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_blend_pbh(__U, __A, __W);
+}
+
+__m256bh test_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
+ // CHECK-LABEL: @test_mm256_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_blend_pbh(__U, __A, __W);
+}
+
+__m128bh test_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
+ return _mm_permutex2var_pbh(__A, __I, __B);
+}
+
+__m256bh test_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
+ return _mm256_permutex2var_pbh(__A, __I, __B);
+}
+
+__m128bh test_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_permutexvar_pbh
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
+ return _mm_permutexvar_pbh(__A, __B);
+}
+
+__m256bh test_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_permutexvar_pbh
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
+ return _mm256_permutexvar_pbh(__A, __B);
+}
+
+__m256bh test_mm256_addne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_addne_pbh
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_addne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_addne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_addne_pbh
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_addne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_addne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_addne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_subne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_subne_pbh
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_subne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_subne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_subne_pbh
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_subne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_subne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_subne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_mulne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_mulne_pbh
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mulne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_mulne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mulne_pbh
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mulne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_mulne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_mulne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_divne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_divne_pbh
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_divne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_divne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_divne_pbh
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_divne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_divne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_divne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_max_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256(
+ return _mm256_max_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_max_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_max_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128(
+ return _mm_max_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_max_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_max_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_max_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_min_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16256(
+ return _mm256_min_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_min_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_min_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16128(
+ return _mm_min_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_min_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_min_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_min_pbh(__U, __A, __B);
+}
+
+int test_mm_comeqsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comeqsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comeqsbh(__A, __B);
+}
+
+int test_mm_comltsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comltsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comltsbh(__A, __B);
+}
+
+int test_mm_comlesbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comlesbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comlesbh(__A, __B);
+}
+
+int test_mm_comgtsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comgtsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comgtsbh(__A, __B);
+}
+
+int test_mm_comgesbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comgesbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comgesbh(__A, __B);
+}
+
+int test_mm_comneqsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comneqsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comneqsbh(__A, __B);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: @test_mm256_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_lt_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_le_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_unord_q(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nlt_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nle_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ord_q(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nge_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ngt_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_false_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ge_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_gt_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_true_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_lt_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_le_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_unord_s(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nlt_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nle_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ord_s(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nge_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ngt_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_false_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ge_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_gt_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_true_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: @test_mm256_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_lt_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_le_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_unord_q(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nle_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ord_q(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nge_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_false_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ge_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_gt_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_true_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_lt_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_le_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_unord_s(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nle_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ord_s(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nge_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_false_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ge_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_gt_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_true_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: @test_mm_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_lt_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_le_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_unord_q(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nlt_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nle_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ord_q(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nge_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ngt_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_false_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ge_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_gt_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_true_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_lt_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_le_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_unord_s(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nlt_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nle_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ord_s(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nge_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ngt_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_false_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ge_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_gt_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_true_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: @test_mm_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_lt_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_le_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_unord_q(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nlt_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nle_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ord_q(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nge_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ngt_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_false_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ge_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_gt_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_true_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_lt_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_le_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_unord_s(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nlt_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nle_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ord_s(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nge_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ngt_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_false_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ge_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_gt_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_true_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+
+__mmask16 test_mm256_mask_fpclass_pbh_mask(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
+ return _mm256_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask16 test_mm256_fpclass_pbh_mask(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
+ return _mm256_fpclass_pbh_mask(__A, 4);
+}
+
+__mmask8 test_mm_mask_fpclass_pbh_mask(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
+ return _mm_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask8 test_mm_fpclass_pbh_mask(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
+ return _mm_fpclass_pbh_mask(__A, 4);
+}
+
+__m256bh test_mm256_scalef_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_scalef_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_scalef_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_rcp_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return _mm256_rcp_pbh(__A);
+}
+
+__m256bh test_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return (__m256bh)_mm256_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return _mm256_maskz_rcp_pbh(__U, __A);
+}
+
+__m256bh test_mm256_getexp_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_getexp_pbh(__A);
+}
+
+__m256bh test_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_maskz_getexp_pbh(__U, __A);
+}
+
+__m256bh test_mm256_rsqrt_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return _mm256_rsqrt_pbh(__A);
+}
+
+__m256bh test_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return (__m256bh)_mm256_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return _mm256_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m256bh test_mm256_reducene_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_reducene_pbh(__A, 3);
+}
+
+__m256bh test_mm256_mask_reducene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m256bh test_mm256_maskz_reducene_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m256bh test_mm256_roundscalene_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_roundscalene_pbh(__A, 3);
+}
+
+__m256bh test_mm256_mask_roundscalene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m256bh test_mm256_maskz_roundscalene_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m256bh test_mm256_getmant_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_mask_getmant_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_maskz_getmant_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_sqrt_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_sqrt_pbh
+ // CHECK: call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %{{.*}})
+ return _mm256_sqrt_pbh(__A);
+}
+
+__m256bh test_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_sqrt_pbh
+ // CHECK: @llvm.sqrt.v16bf16
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_sqrt_pbh
+ // CHECK: @llvm.sqrt.v16bf16
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_sqrt_pbh(__U, __A);
+}
+
+__m128bh test_mm_scalef_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_scalef_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_rcp_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return _mm_rcp_pbh(__A);
+}
+
+__m128bh test_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return (__m128bh)_mm_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return _mm_maskz_rcp_pbh(__U, __A);
+}
+
+__m128bh test_mm_getexp_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_getexp_pbh(__A);
+}
+
+__m128bh test_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_maskz_getexp_pbh(__U, __A);
+}
+
+__m128bh test_mm_rsqrt_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return _mm_rsqrt_pbh(__A);
+}
+
+__m128bh test_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return (__m128bh)_mm_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return _mm_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m128bh test_mm_reducene_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_reducene_pbh(__A, 3);
+}
+
+__m128bh test_mm_mask_reducene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m128bh test_mm_maskz_reducene_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m128bh test_mm_roundscalene_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_roundscalene_pbh(__A, 3);
+}
+
+__m128bh test_mm_mask_roundscalene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m128bh test_mm_maskz_roundscalene_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m128bh test_mm_getmant_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_mask_getmant_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_maskz_getmant_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_sqrt_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ return _mm_sqrt_pbh(__A);
+}
+
+__m128bh test_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_sqrt_pbh(__U, __A);
+}
+
+__m256bh test_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fmaddne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fmsubne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fnmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fnmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fnmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fnmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fmaddne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fmsubne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 515b0d0fcc22c..4fb6d66ab5894 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6396,3 +6396,413 @@ let TargetPrefix = "x86" in {
llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
}
+
+//===----------------------------------------------------------------------===//
+let TargetPrefix = "x86" in {
+ def int_x86_avx10_vaddnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vaddnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vaddnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16512
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16256
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16128
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16512
+ : ClangBuiltin<"__builtin_ia32_vminpbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16256
+ : ClangBuiltin<"__builtin_ia32_vminpbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16128
+ : ClangBuiltin<"__builtin_ia32_vminpbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vcomsbf16eq
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16lt
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty,llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16le
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16le">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16gt
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16ge
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16neq
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_128
+ : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_256
+ : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_512
+ : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16128_mask">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16256_mask">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16512_mask">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16128">,
+ Intrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6e17150edf278..aa2a9d17f0ab9 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3305,11 +3305,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
(PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
+ PatchedName.ends_with("pbf16") ||
PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
+ unsigned suffixLength = PatchedName.ends_with("pbf16") ? 5 : 2;
unsigned CC = StringSwitch<unsigned>(
- PatchedName.slice(CCIdx, PatchedName.size() - 2))
+ PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
.Case("eq", 0x00)
.Case("eq_oq", 0x00)
.Case("lt", 0x01)
@@ -3372,6 +3374,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName = "vcmpsh";
else if (PatchedName.ends_with("ph"))
PatchedName = "vcmpph";
+ else if (PatchedName.ends_with("pbf16"))
+ PatchedName = "vcmppbf16";
else
llvm_unreachable("Unexpected suffix!");
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 33104524c5a89..8fcc1c10d93a0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -167,6 +167,15 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -205,7 +214,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
printwordmem(MI, CurOp--, OS);
else
printdwordmem(MI, CurOp--, OS);
- } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD &&
+ (Desc.TSFlags & X86II::OpMapMask) != X86II::TA) {
assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
"Unexpected op map!");
printqwordmem(MI, CurOp--, OS);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 21c1556d1d8ed..2f26ff3a627f0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -310,6 +310,17 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
OS << "sh\t";
break;
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
+ OS << "pbf16\t";
+ break;
}
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index 7c8459a546516..39600ffcadd8e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -146,6 +146,15 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9fafb66ab0b3f..e9cf8fb49b1bd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2358,6 +2358,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom);
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
+ addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
+
+ setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
+ setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ if (Subtarget.hasVLX()) {
+ for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
+ }
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
@@ -12206,7 +12231,8 @@ static bool isShuffleFoldableLoad(SDValue V) {
template<typename T>
static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
T EltVT = VT.getScalarType();
- return EltVT == MVT::bf16 || (EltVT == MVT::f16 && !Subtarget.hasFP16());
+ return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
+ (EltVT == MVT::f16 && !Subtarget.hasFP16());
}
/// Try to lower insertion of a single element into a zero vector.
@@ -23257,7 +23283,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (isFP) {
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
- assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
+ EltVT == MVT::f64);
if (isSoftF16(EltVT, Subtarget))
return SDValue();
@@ -23274,7 +23301,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op0.getSimpleValueType().is512BitVector())) {
#ifndef NDEBUG
unsigned Num = VT.getVectorNumElements();
- assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16));
+ assert(Num <= 16 ||
+ (Num == 32 && (EltVT == MVT::f16 || EltVT == MVT::bf16)));
#endif
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
@@ -54073,7 +54101,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
EVT ScalarVT = VT.getScalarType();
if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
!Subtarget.hasAnyFMA()) &&
- !(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
+ !(ScalarVT == MVT::f16 && Subtarget.hasFP16()) &&
+ !(ScalarVT == MVT::bf16 && Subtarget.hasAVX10_2()))
return SDValue();
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 666667895bc39..31f1d3a6c2ea3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -31,3 +31,313 @@ multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in
defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
+
+//-------------------------------------------------
+// AVX10 BF16 instructions
+//-------------------------------------------------
+
+// VADDNEPBF16, VSUBNEPBF16, VMULNEPBF16, VDIVNEPBF16, VMAXPBF16, VMINPBF16
+multiclass avx10_fp_binopne_int_pbf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+multiclass avx10_fp_binop_pbf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0,
+ SDPatternOperator MaskOpNode = OpNode> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm NEPBF16Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm NEPBF16Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VADD : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>;
+defm VSUB : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>;
+defm VMUL : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 0>;
+defm VDIV : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>;
+defm VMIN : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
+defm VMAX : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
+}
+
+// VCOMSBF16
+let Uses = []<Register>, mayRaiseFPException = 0,
+ Defs = [EFLAGS], Predicates = [HasAVX10_2_512] in {
+ defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
+ "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+
+ let isCodeGenOnly = 1 in {
+ defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
+ sse_load_bf16, "comsbf16", SSEPackedSingle>,
+ T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ }
+}
+
+// VCMPPBF16
+multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ let mayRaiseFPException = 0 in {
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ 1>, Sched<[sched]>;
+
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc)>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $cc",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc)>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
+}
+
+multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX10_2_512] in
+ defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
+ }
+}
+
+defm VCMPPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
+ AVX512XDIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
+
+
+// VSQRTNEPBF16
+multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.ZMM, v32bf16_info>,
+ EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm NEPBF16Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.XMM, v8bf16x_info>,
+ EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ defm NEPBF16Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.YMM, v16bf16x_info>,
+ EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in
+defm VSQRT : avx10_sqrt_packed_bf16<0x51, "vsqrtne", SchedWriteFSqrtSizes>;
+
+// VRSQRTPBF16, VRCPPBF16, VSRQTPBF16, VGETEXPPBF16
+multiclass avx10_fp14_pbf16<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.ZMM, v32bf16_info>,
+ EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.XMM, v8bf16x_info>,
+ EVEX_V128;
+ defm PBF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.YMM, v16bf16x_info>,
+ EVEX_V256;
+ }
+}
+
+defm VRSQRT : avx10_fp14_pbf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>,
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+defm VRCP : avx10_fp14_pbf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>,
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+defm VGETEXP : avx10_fp14_pbf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>,
+ T_MAP5, EVEX_CD8<16, CD8VF>;
+
+// VSCALEFPBF16
+multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
+ EVEX_V512, T_MAP6,PS, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
+ defm PBF16Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in
+defm VSCALEF : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
+
+// VREDUCENEPBF16, VRNDSCALENEPBF16, VGETMANTPBF16
+multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128>, EVEX_V128;
+ defm PBF16Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256>, EVEX_V256;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VREDUCENE : avx10_common_unary_fp_packed_imm_bf16<"vreducene", avx512vl_bf16_info, 0x56,
+ X86VReduce, X86VReduce, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+defm VRNDSCALENE : avx10_common_unary_fp_packed_imm_bf16<"vrndscalene", avx512vl_bf16_info, 0x08,
+ X86any_VRndScale, X86VRndScale, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+defm VGETMANT : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
+ X86VGetMant, X86VGetMant, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+}
+
+// VFPCLASSPBF16
+multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
+ avx512vl_bf16_info.info512, "z">, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
+ avx512vl_bf16_info.info128, "x">, EVEX_V128;
+ defm PBF16Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
+ avx512vl_bf16_info.info256, "y">, EVEX_V256;
+ }
+}
+
+// FIXME: need to set Uses = []<Register> but avx512_vector_fpclass has InstAlias.
+defm VFPCLASS : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+
+// VF[,N]M[ADD,SUB][132,213,231]NEPBF16
+multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD213NE : avx10_fma3p_213_bf16<0xA8, "vfmadd213nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB213NE : avx10_fma3p_213_bf16<0xAA, "vfmsub213nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD213NE : avx10_fma3p_213_bf16<0xAC, "vfnmadd213nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB213NE : avx10_fma3p_213_bf16<0xAE, "vfnmsub213nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
+
+multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD231NE : avx10_fma3p_231_bf16<0xB8, "vfmadd231nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB231NE : avx10_fma3p_231_bf16<0xBA, "vfmsub231nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD231NE : avx10_fma3p_231_bf16<0xBC, "vfnmadd231nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB231NE : avx10_fma3p_231_bf16<0xBE, "vfnmsub231nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
+
+multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD132NE : avx10_fma3p_132_bf16<0x98, "vfmadd132nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB132NE : avx10_fma3p_132_bf16<0x9A, "vfmsub132nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD132NE : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB132NE : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 74596cec5c5ef..c8f49b025c76d 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -206,6 +206,12 @@ def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+
+def X86CmpMaskCC_Int :
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i32>]>;
+
def X86MaskCmpMaskCC :
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
@@ -1023,6 +1029,10 @@ def X86SubVBroadcastld256 : PatFrag<(ops node:$src),
// only load a single element.
// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
// the simple_load case.
+def sse_load_bf16 : PatFrags<(ops node:$ptr),
+ [(v8bf16 (simple_load node:$ptr)),
+ (v8bf16 (X86vzload16 node:$ptr)),
+ (v8bf16 (scalar_to_vector (loadf16 node:$ptr)))]>;
def sse_load_f16 : PatFrags<(ops node:$ptr),
[(v8f16 (simple_load node:$ptr)),
(v8f16 (X86vzload16 node:$ptr)),
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index 8387b76a40cdd..531268b41da96 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -313,7 +313,7 @@ def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
-def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
+def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf16">;
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
@@ -323,7 +323,7 @@ def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
-def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
+def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf16">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
@@ -332,7 +332,7 @@ def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
-def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">;
+def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf16">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 000138e1837af..281ea1f44ffee 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -388,12 +388,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV,
0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_128, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_256, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_512, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_128, INTR_TYPE_2OP_MASK, X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_256, INTR_TYPE_2OP_MASK, X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_512, INTR_TYPE_2OP_MASK, X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_128, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_256, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_512, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_128, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_256, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_512, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_128, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_256, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_512, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx10_vaddpd256, INTR_TYPE_2OP, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx10_vaddph256, INTR_TYPE_2OP, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD,
X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16le, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16lt, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16neq, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(avx10_vmpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW,
0),
X86_INTRINSIC_DATA(avx2_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
new file mode 100644
index 0000000000000..33c40ac6bb32c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
@@ -0,0 +1,587 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+define <32 x bfloat> @test_int_x86_avx10_vaddnepbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vaddnepbf16512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fadd <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fadd <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fadd <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define <32 x bfloat> @test_int_x86_avx10_sub_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fsub <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fsub <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fsub <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fsub <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.vmulnepbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_mul_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fmul <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fmul <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fmul <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fmul <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define <32 x bfloat> @test_int_x86_avx10_div_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fdiv <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fdiv <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fdiv <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fdiv <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define i32 @test_int_x86_avx10_vcmppbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <32 x bfloat> %x1, %x2
+ %res = bitcast <32 x i1> %1 to i32
+ ret i32 %res
+}
+
+; FIXME: _mm512_mask_cmp_p[s|h]_mask is not using {k2} but gcc does
+define i32 @test_int_x86_avx10_vcmppbf16512_mask2(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <32 x bfloat> %x1, %x2
+ %2 = and <32 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <32 x i1> %2 to i32
+ ret i32 %3
+}
+
+define <32 x bfloat> @test_sqrt_nepbf16_512(<32 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %a0)
+ ret <32 x bfloat> %1
+}
+
+define <32 x bfloat> @test_mm512_mask_sqrt_pbh(<32 x bfloat> %__W, i32 %__U, <32 x bfloat> %__A) {
+; X64-LABEL: test_mm512_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__W
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_sqrt_pbh(i32 %__U, <32 x bfloat>%__A) {
+; X64-LABEL: test_mm512_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %__C)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fnmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fnmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__B
+ %fneg1.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %fneg1.i)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fnmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fnmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
new file mode 100644
index 0000000000000..d574d54b9ad79
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
@@ -0,0 +1,296 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+declare <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ ret <32 x bfloat> %res0
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %res1
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ ret <32 x bfloat> %res0
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %res1
+}
+
+declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
+
+define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
+; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
+; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
+; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
+; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat> @test_rsqrt_nepbf16_512(<32 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> zeroinitializer, i32 -1)
+ ret <32 x bfloat> %res
+}
+
+declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32)
+
+define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4)
+ %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2)
+ %1 = and <32 x i1> %res1, %res
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat> @test_rcp_nepbf16_512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask) {
+; X64-LABEL: test_rcp_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask)
+ ret <32 x bfloat> %res
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
+; X64-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
+; X86-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2)
+ %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> zeroinitializer, i32 -1)
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
+; X64-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
+; X86-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %x3 to <32 x i1>
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3)
+ %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> zeroinitializer, i32 -1)
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
new file mode 100644
index 0000000000000..e0f5679e8ac96
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -0,0 +1,1168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+define <16 x bfloat> @test_int_x86_avx10_add_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fadd <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+define <16 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fadd <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fadd <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fadd <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_add_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fadd <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fadd <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fadd <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fadd <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_sub_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fsub <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fsub <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fsub <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fsub <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_sub_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fsub <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fsub <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fsub <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fsub <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mul_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fmul <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fmul <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fmul <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fmul <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mul_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fmul <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fmul <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fmul <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fmul <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_div_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fdiv <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+; FIXME: assembly order is different from fp16 ones
+define <16 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fdiv <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fdiv <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fdiv <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_div_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fdiv <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+; FIXME: assembly order is different from fp16 ones
+define <8 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fdiv <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fdiv <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fdiv <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define i16 @test_int_x86_avx10_vcmppbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <16 x bfloat> %x1, %x2
+ %res = bitcast <16 x i1> %1 to i16
+ ret i16 %res
+}
+
+define i16 @test_int_x86_avx10_vcmppbf16256_mask2(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <16 x bfloat> %x1, %x2
+ %2 = and <16 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <16 x i1> %2 to i16
+ ret i16 %3
+}
+
+define i8 @test_int_x86_avx10_vcmppbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <8 x bfloat> %x1, %x2
+ %res = bitcast <8 x i1> %1 to i8
+ ret i8 %res
+}
+
+define i8 @test_int_x86_avx10_vcmppbf16128_mask2(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andb $3, %al # encoding: [0x24,0x03]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <8 x bfloat> %x1, %x2
+ %2 = and <8 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
+
+define <16 x bfloat> @test_sqrt_nepbf16_256(<16 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %a0)
+ ret <16 x bfloat> %1
+}
+
+define <16 x bfloat> @test_mm256_mask_sqrt_pbh(<16 x bfloat> %__W, i16 %__U, <16 x bfloat> %__A) {
+; X64-LABEL: test_mm256_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__W
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_sqrt_pbh(i16 %__U, <16 x bfloat>%__A) {
+; X64-LABEL: test_mm256_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <8 x bfloat> @test_sqrt_nepbf16_128(<8 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a0)
+ ret <8 x bfloat> %1
+}
+
+define <8 x bfloat> @test_mm_mask_sqrt_pbh(<8 x bfloat> %__W, i8 %__U, <8 x bfloat> %__A) {
+; X64-LABEL: test_mm_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__W
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_sqrt_pbh(i8 %__U, <8 x bfloat>%__A) {
+; X64-LABEL: test_mm_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %__C)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fnmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fnmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__B
+ %fneg1.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %fneg1.i)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fnmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fnmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %__C)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fnmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fnmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__B
+ %fneg1.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %fneg1.i)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fnmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fnmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
new file mode 100644
index 0000000000000..f0d3ed239662f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
@@ -0,0 +1,536 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+declare <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat>, <16 x bfloat>)
+
+define <16 x bfloat> @test_int_x86_avx10_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ ret <16 x bfloat> %res0
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat>, <8 x bfloat>)
+
+define <8 x bfloat> @test_int_x86_avx10_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ ret <8 x bfloat> %res0
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %res1
+}
+
+declare <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat>, <16 x bfloat>)
+
+define <16 x bfloat> @test_int_x86_avx10_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ ret <16 x bfloat> %res0
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat>, <8 x bfloat>)
+
+define <8 x bfloat> @test_int_x86_avx10_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ ret <8 x bfloat> %res0
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat> @test_rsqrt_nepbf16_128(<8 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat> @test_rsqrt_nepbf16_256(<16 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat> @test_rcp_nepbf16_128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask) {
+; X64-LABEL: test_rcp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat> @test_rcp_nepbf16_256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask) {
+; X64-LABEL: test_rcp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat>, i32)
+declare <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat>, i32)
+
+define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 4)
+ %res1 = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 2)
+ %1 = and <8 x i1> %res1, %res
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 4)
+ %res1 = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 2)
+ %1 = and <16 x i1> %res1, %res
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_getexp_nepbf16_128(<8 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgetexppbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x42,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_128(<8 x bfloat> %x0, i8 %x2) {
+; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 %x2)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_getexp_nepbf16_256(<16 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgetexppbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x42,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_256(<16 x bfloat> %x0, i16 %x2) {
+; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 %x2)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x2c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %x3 to <8 x i1>
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %x3 to <8 x i1>
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 %x3)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x2c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %x3 to <16 x i1>
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %x3 to <16 x i1>
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 %x3)
+ ret <16 x bfloat> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
new file mode 100644
index 0000000000000..bb6f2b1f6b7df
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
@@ -0,0 +1,3015 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x58,0xd4
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x58,0xd4
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x58,0xd4
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x58,0x10
+
+# ATT: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x58,0x52,0x80
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x58,0x10
+
+# ATT: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x58,0x52,0x80
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x58,0x10
+
+# ATT: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x58,0x52,0x80
+
+# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymm4, 123
+0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmm4, 123
+0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmm4, 123
+0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcomsbf16 %xmm3, %xmm2
+# INTEL: vcomsbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x2f,0xd3
+
+# ATT: vcomsbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcomsbf16 291(%edi,%eax,4), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcomsbf16 (%eax), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [eax]
+0x62,0xf5,0x7d,0x08,0x2f,0x10
+
+# ATT: vcomsbf16 -64(,%ebp,2), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [2*ebp - 64]
+0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomsbf16 254(%ecx), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [ecx + 254]
+0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f
+
+# ATT: vcomsbf16 -256(%edx), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [edx - 256]
+0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5e,0xd4
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5e,0xd4
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5e,0x10
+
+# ATT: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5e,0x10
+
+# ATT: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5e,0x10
+
+# ATT: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x98,0x52,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x98,0x52,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x98,0x52,0x80
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xba,0x52,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xba,0x52,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xba,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xac,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xac,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xac,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xae,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xae,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xae,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80
+
+# ATT: vfpclasspbf16 $123, %zmm3, %k5
+# INTEL: vfpclasspbf16 k5, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm3, %k5
+# INTEL: vfpclasspbf16 k5, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm3, %k5
+# INTEL: vfpclasspbf16 k5, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to8}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16x $123, -512(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to16}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to32}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b
+
+# ATT: vgetexppbf16 %xmm3, %xmm2
+# INTEL: vgetexppbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x42,0xd3
+
+# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vgetexppbf16 xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x42,0xd3
+
+# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2
+# INTEL: vgetexppbf16 zmm2, zmm3
+0x62,0xf5,0x7d,0x48,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vgetexppbf16 zmm2 {k7}, zmm3
+0x62,0xf5,0x7d,0x4f,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf5,0x7d,0xcf,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2
+# INTEL: vgetexppbf16 ymm2, ymm3
+0x62,0xf5,0x7d,0x28,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vgetexppbf16 ymm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x2f,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf5,0x7d,0xaf,0x42,0xd3
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to8}, %xmm2
+# INTEL: vgetexppbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x18,0x42,0x10
+
+# ATT: vgetexppbf16 -512(,%ebp,2), %xmm2
+# INTEL: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to16}, %ymm2
+# INTEL: vgetexppbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf5,0x7d,0x38,0x42,0x10
+
+# ATT: vgetexppbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to32}, %zmm2
+# INTEL: vgetexppbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf5,0x7d,0x58,0x42,0x10
+
+# ATT: vgetexppbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vgetmantpbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vgetmantpbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vgetmantpbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5f,0xd4
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5f,0xd4
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5f,0x10
+
+# ATT: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5f,0x10
+
+# ATT: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5f,0x10
+
+# ATT: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5d,0xd4
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5d,0xd4
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5d,0xd4
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5d,0x10
+
+# ATT: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5d,0x10
+
+# ATT: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5d,0x10
+
+# ATT: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x59,0xd4
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x59,0xd4
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x59,0xd4
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x59,0x10
+
+# ATT: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x59,0x52,0x80
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x59,0x10
+
+# ATT: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x59,0x52,0x80
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x59,0x10
+
+# ATT: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x59,0x52,0x80
+
+# ATT: vrcppbf16 %xmm3, %xmm2
+# INTEL: vrcppbf16 xmm2, xmm3
+0x62,0xf6,0x7c,0x08,0x4c,0xd3
+
+# ATT: vrcppbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vrcppbf16 xmm2 {k7}, xmm3
+0x62,0xf6,0x7c,0x0f,0x4c,0xd3
+
+# ATT: vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf6,0x7c,0x8f,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2
+# INTEL: vrcppbf16 zmm2, zmm3
+0x62,0xf6,0x7c,0x48,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vrcppbf16 zmm2 {k7}, zmm3
+0x62,0xf6,0x7c,0x4f,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf6,0x7c,0xcf,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2
+# INTEL: vrcppbf16 ymm2, ymm3
+0x62,0xf6,0x7c,0x28,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vrcppbf16 ymm2 {k7}, ymm3
+0x62,0xf6,0x7c,0x2f,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf6,0x7c,0xaf,0x4c,0xd3
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to8}, %xmm2
+# INTEL: vrcppbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf6,0x7c,0x18,0x4c,0x10
+
+# ATT: vrcppbf16 -512(,%ebp,2), %xmm2
+# INTEL: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to16}, %ymm2
+# INTEL: vrcppbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf6,0x7c,0x38,0x4c,0x10
+
+# ATT: vrcppbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to32}, %zmm2
+# INTEL: vrcppbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf6,0x7c,0x58,0x4c,0x10
+
+# ATT: vrcppbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2
+# INTEL: vreducenepbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vreducenepbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2
+# INTEL: vreducenepbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vreducenepbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2
+# INTEL: vreducenepbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vreducenepbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmm3
+0x62,0xf6,0x7c,0x08,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vrsqrtpbf16 xmm2 {k7}, xmm3
+0x62,0xf6,0x7c,0x0f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf6,0x7c,0x8f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmm3
+0x62,0xf6,0x7c,0x48,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vrsqrtpbf16 zmm2 {k7}, zmm3
+0x62,0xf6,0x7c,0x4f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf6,0x7c,0xcf,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymm3
+0x62,0xf6,0x7c,0x28,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vrsqrtpbf16 ymm2 {k7}, ymm3
+0x62,0xf6,0x7c,0x2f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf6,0x7c,0xaf,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to8}, %xmm2
+# INTEL: vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf6,0x7c,0x18,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -512(,%ebp,2), %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to16}, %ymm2
+# INTEL: vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf6,0x7c,0x38,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to32}, %zmm2
+# INTEL: vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf6,0x7c,0x58,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x2c,0xd4
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x2c,0xd4
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x2c,0x10
+
+# ATT: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x2c,0x10
+
+# ATT: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x2c,0x10
+
+# ATT: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vsqrtnepbf16 xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmm3
+0x62,0xf5,0x7d,0x48,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vsqrtnepbf16 zmm2 {k7}, zmm3
+0x62,0xf5,0x7d,0x4f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf5,0x7d,0xcf,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymm3
+0x62,0xf5,0x7d,0x28,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vsqrtnepbf16 ymm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x2f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf5,0x7d,0xaf,0x51,0xd3
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to8}, %xmm2
+# INTEL: vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x18,0x51,0x10
+
+# ATT: vsqrtnepbf16 -512(,%ebp,2), %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to16}, %ymm2
+# INTEL: vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf5,0x7d,0x38,0x51,0x10
+
+# ATT: vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to32}, %zmm2
+# INTEL: vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf5,0x7d,0x58,0x51,0x10
+
+# ATT: vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5c,0xd4
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5c,0xd4
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5c,0x10
+
+# ATT: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5c,0x10
+
+# ATT: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5c,0x10
+
+# ATT: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
new file mode 100644
index 0000000000000..3cde4682299fc
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
@@ -0,0 +1,3015 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x58,0xf0
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x58,0xf0
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x58,0xf0
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x58,0x72,0x80
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x58,0x72,0x80
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x58,0x72,0x80
+
+# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymm24, 123
+0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmm24, 123
+0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmm24, 123
+0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcomsbf16 %xmm23, %xmm22
+# INTEL: vcomsbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x2f,0xf7
+
+# ATT: vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcomsbf16 291(%r8,%rax,4), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcomsbf16 (%rip), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rip]
+0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcomsbf16 -64(,%rbp,2), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [2*rbp - 64]
+0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomsbf16 254(%rcx), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rcx + 254]
+0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f
+
+# ATT: vcomsbf16 -256(%rdx), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rdx - 256]
+0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5e,0xf0
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5e,0xf0
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5e,0xf0
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5e,0x72,0x80
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x98,0x72,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x98,0x72,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x98,0x72,0x80
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xa8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xb8,0x72,0x80
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9a,0x72,0x80
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xaa,0x72,0x80
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xba,0x72,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xba,0x72,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xba,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9c,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xac,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xac,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xac,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xbc,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9e,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xae,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xae,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xae,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xbe,0x72,0x80
+
+# ATT: vfpclasspbf16 $123, %zmm23, %k5
+# INTEL: vfpclasspbf16 k5, zmm23, 123
+0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmm23, 123
+0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm23, %k5
+# INTEL: vfpclasspbf16 k5, ymm23, 123
+0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymm23, 123
+0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm23, %k5
+# INTEL: vfpclasspbf16 k5, xmm23, 123
+0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmm23, 123
+0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to8}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16x $123, -512(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to16}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to32}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b
+
+# ATT: vgetexppbf16 %xmm23, %xmm22
+# INTEL: vgetexppbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x42,0xf7
+
+# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vgetexppbf16 xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x42,0xf7
+
+# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22
+# INTEL: vgetexppbf16 zmm22, zmm23
+0x62,0xa5,0x7d,0x48,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vgetexppbf16 zmm22 {k7}, zmm23
+0x62,0xa5,0x7d,0x4f,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa5,0x7d,0xcf,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22
+# INTEL: vgetexppbf16 ymm22, ymm23
+0x62,0xa5,0x7d,0x28,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vgetexppbf16 ymm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x2f,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa5,0x7d,0xaf,0x42,0xf7
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to8}, %xmm22
+# INTEL: vgetexppbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -512(,%rbp,2), %xmm22
+# INTEL: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to16}, %ymm22
+# INTEL: vgetexppbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to32}, %zmm22
+# INTEL: vgetexppbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vgetmantpbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vgetmantpbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vgetmantpbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5f,0xf0
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5f,0xf0
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5f,0xf0
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5f,0x72,0x80
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5d,0xf0
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5d,0xf0
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5d,0xf0
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5d,0x72,0x80
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x59,0xf0
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x59,0xf0
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x59,0xf0
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x59,0x72,0x80
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x59,0x72,0x80
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x59,0x72,0x80
+
+# ATT: vrcppbf16 %xmm23, %xmm22
+# INTEL: vrcppbf16 xmm22, xmm23
+0x62,0xa6,0x7c,0x08,0x4c,0xf7
+
+# ATT: vrcppbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vrcppbf16 xmm22 {k7}, xmm23
+0x62,0xa6,0x7c,0x0f,0x4c,0xf7
+
+# ATT: vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa6,0x7c,0x8f,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22
+# INTEL: vrcppbf16 zmm22, zmm23
+0x62,0xa6,0x7c,0x48,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vrcppbf16 zmm22 {k7}, zmm23
+0x62,0xa6,0x7c,0x4f,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa6,0x7c,0xcf,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22
+# INTEL: vrcppbf16 ymm22, ymm23
+0x62,0xa6,0x7c,0x28,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vrcppbf16 ymm22 {k7}, ymm23
+0x62,0xa6,0x7c,0x2f,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa6,0x7c,0xaf,0x4c,0xf7
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to8}, %xmm22
+# INTEL: vrcppbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -512(,%rbp,2), %xmm22
+# INTEL: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to16}, %ymm22
+# INTEL: vrcppbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to32}, %zmm22
+# INTEL: vrcppbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22
+# INTEL: vreducenepbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vreducenepbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22
+# INTEL: vreducenepbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vreducenepbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22
+# INTEL: vreducenepbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vreducenepbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmm23
+0x62,0xa6,0x7c,0x08,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vrsqrtpbf16 xmm22 {k7}, xmm23
+0x62,0xa6,0x7c,0x0f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa6,0x7c,0x8f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmm23
+0x62,0xa6,0x7c,0x48,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vrsqrtpbf16 zmm22 {k7}, zmm23
+0x62,0xa6,0x7c,0x4f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa6,0x7c,0xcf,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymm23
+0x62,0xa6,0x7c,0x28,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vrsqrtpbf16 ymm22 {k7}, ymm23
+0x62,0xa6,0x7c,0x2f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa6,0x7c,0xaf,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to8}, %xmm22
+# INTEL: vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -512(,%rbp,2), %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to16}, %ymm22
+# INTEL: vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to32}, %zmm22
+# INTEL: vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x2c,0xf0
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x2c,0xf0
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x2c,0xf0
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x2c,0x72,0x80
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vsqrtnepbf16 xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmm23
+0x62,0xa5,0x7d,0x48,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vsqrtnepbf16 zmm22 {k7}, zmm23
+0x62,0xa5,0x7d,0x4f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa5,0x7d,0xcf,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymm23
+0x62,0xa5,0x7d,0x28,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vsqrtnepbf16 ymm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x2f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa5,0x7d,0xaf,0x51,0xf7
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to8}, %xmm22
+# INTEL: vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -512(,%rbp,2), %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to16}, %ymm22
+# INTEL: vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to32}, %zmm22
+# INTEL: vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5c,0xf0
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5c,0xf0
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5c,0xf0
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5c,0x72,0x80
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-att.s b/llvm/test/MC/X86/avx10.2-bf16-32-att.s
new file mode 100644
index 0000000000000..9f62743177c9b
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-32-att.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10]
+ vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f]
+ vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10]
+ vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f]
+ vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10]
+ vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f]
+ vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %ymm4, %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %xmm4, %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %zmm4, %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+
+// CHECK: vcomsbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3]
+ vcomsbf16 %xmm3, %xmm2
+
+// CHECK: vcomsbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomsbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcomsbf16 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomsbf16 291(%edi,%eax,4), %xmm2
+
+// CHECK: vcomsbf16 (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10]
+ vcomsbf16 (%eax), %xmm2
+
+// CHECK: vcomsbf16 -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 -64(,%ebp,2), %xmm2
+
+// CHECK: vcomsbf16 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f]
+ vcomsbf16 254(%ecx), %xmm2
+
+// CHECK: vcomsbf16 -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80]
+ vcomsbf16 -256(%edx), %xmm2
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10]
+ vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f]
+ vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10]
+ vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f]
+ vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10]
+ vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f]
+ vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f]
+ vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f]
+ vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfpclasspbf16 $123, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %zmm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %ymm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %xmm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+
+// CHECK: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to8}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to8}, %k5
+
+// CHECK: vfpclasspbf16x $123, -512(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16x $123, -512(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to16}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to16}, %k5
+
+// CHECK: vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to32}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to32}, %k5
+
+// CHECK: vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10]
+ vgetexppbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vgetexppbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f]
+ vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10]
+ vgetexppbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vgetexppbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f]
+ vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10]
+ vgetexppbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vgetexppbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f]
+ vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10]
+ vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f]
+ vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10]
+ vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f]
+ vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10]
+ vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f]
+ vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10]
+ vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f]
+ vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10]
+ vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f]
+ vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10]
+ vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f]
+ vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10]
+ vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f]
+ vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10]
+ vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f]
+ vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10]
+ vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f]
+ vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2
+
+// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2
+
+// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2
+
+// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10]
+ vrcppbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vrcppbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f]
+ vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10]
+ vrcppbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vrcppbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f]
+ vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10]
+ vrcppbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vrcppbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f]
+ vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vrsqrtpbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f]
+ vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10]
+ vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f]
+ vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10]
+ vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f]
+ vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10]
+ vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f]
+ vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vsqrtnepbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f]
+ vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f]
+ vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f]
+ vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10]
+ vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f]
+ vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10]
+ vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f]
+ vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10]
+ vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f]
+ vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-intel.s b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s
new file mode 100644
index 0000000000000..30c2cf45297bc
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4]
+ vaddnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4]
+ vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4]
+ vaddnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4]
+ vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4]
+ vaddnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4]
+ vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10]
+ vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10]
+ vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10]
+ vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vcmppbf16 k5, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b]
+ vcmppbf16 k5, ymm3, ymm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b]
+ vcmppbf16 k5, xmm3, xmm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b]
+ vcmppbf16 k5, zmm3, zmm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b]
+ vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b]
+ vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b]
+ vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+
+// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vcomsbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3]
+ vcomsbf16 xmm2, xmm3
+
+// CHECK: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vcomsbf16 xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10]
+ vcomsbf16 xmm2, word ptr [eax]
+
+// CHECK: vcomsbf16 xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vcomsbf16 xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f]
+ vcomsbf16 xmm2, word ptr [ecx + 254]
+
+// CHECK: vcomsbf16 xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80]
+ vcomsbf16 xmm2, word ptr [edx - 256]
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4]
+ vdivnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4]
+ vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4]
+ vdivnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4]
+ vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4]
+ vdivnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4]
+ vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10]
+ vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10]
+ vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10]
+ vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4]
+ vfmadd132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4]
+ vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4]
+ vfmadd132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4]
+ vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4]
+ vfmadd132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4]
+ vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10]
+ vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10]
+ vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10]
+ vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10]
+ vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10]
+ vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10]
+ vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10]
+ vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10]
+ vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10]
+ vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10]
+ vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10]
+ vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10]
+ vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10]
+ vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10]
+ vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10]
+ vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4]
+ vfmsub231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4]
+ vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4]
+ vfmsub231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4]
+ vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4]
+ vfmsub231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4]
+ vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10]
+ vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10]
+ vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10]
+ vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10]
+ vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10]
+ vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10]
+ vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10]
+ vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10]
+ vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10]
+ vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10]
+ vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10]
+ vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10]
+ vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10]
+ vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10]
+ vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10]
+ vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10]
+ vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10]
+ vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10]
+ vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10]
+ vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10]
+ vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10]
+ vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfpclasspbf16 k5, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, zmm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, zmm3, 123
+
+// CHECK: vfpclasspbf16 k5, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, ymm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, ymm3, 123
+
+// CHECK: vfpclasspbf16 k5, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, xmm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, xmm3, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+
+// CHECK: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vgetexppbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3]
+ vgetexppbf16 xmm2, xmm3
+
+// CHECK: vgetexppbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3]
+ vgetexppbf16 xmm2 {k7}, xmm3
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3]
+ vgetexppbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vgetexppbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3]
+ vgetexppbf16 zmm2, zmm3
+
+// CHECK: vgetexppbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3]
+ vgetexppbf16 zmm2 {k7}, zmm3
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3]
+ vgetexppbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vgetexppbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3]
+ vgetexppbf16 ymm2, ymm3
+
+// CHECK: vgetexppbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3]
+ vgetexppbf16 ymm2 {k7}, ymm3
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3]
+ vgetexppbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10]
+ vgetexppbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f]
+ vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80]
+ vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10]
+ vgetexppbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f]
+ vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80]
+ vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10]
+ vgetexppbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f]
+ vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80]
+ vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vgetmantpbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2, zmm3, 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2, ymm3, 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b]
+ vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b]
+ vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b]
+ vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4]
+ vmaxpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4]
+ vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4]
+ vmaxpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4]
+ vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4]
+ vmaxpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4]
+ vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10]
+ vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10]
+ vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10]
+ vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4]
+ vminpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4]
+ vminpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4]
+ vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vminpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4]
+ vminpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4]
+ vminpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4]
+ vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vminpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4]
+ vminpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4]
+ vminpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4]
+ vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10]
+ vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f]
+ vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80]
+ vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10]
+ vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f]
+ vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80]
+ vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10]
+ vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f]
+ vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80]
+ vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4]
+ vmulnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4]
+ vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4]
+ vmulnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4]
+ vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4]
+ vmulnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4]
+ vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10]
+ vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10]
+ vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10]
+ vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vrcppbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3]
+ vrcppbf16 xmm2, xmm3
+
+// CHECK: vrcppbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3]
+ vrcppbf16 xmm2 {k7}, xmm3
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3]
+ vrcppbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vrcppbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3]
+ vrcppbf16 zmm2, zmm3
+
+// CHECK: vrcppbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3]
+ vrcppbf16 zmm2 {k7}, zmm3
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3]
+ vrcppbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vrcppbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3]
+ vrcppbf16 ymm2, ymm3
+
+// CHECK: vrcppbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3]
+ vrcppbf16 ymm2 {k7}, ymm3
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3]
+ vrcppbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10]
+ vrcppbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f]
+ vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80]
+ vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10]
+ vrcppbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f]
+ vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80]
+ vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10]
+ vrcppbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f]
+ vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80]
+ vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vreducenepbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2, zmm3, 123
+
+// CHECK: vreducenepbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vreducenepbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2, ymm3, 123
+
+// CHECK: vreducenepbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vreducenepbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b]
+ vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b]
+ vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b]
+ vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2, zmm3, 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2, ymm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b]
+ vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b]
+ vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b]
+ vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vrsqrtpbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3]
+ vrsqrtpbf16 xmm2, xmm3
+
+// CHECK: vrsqrtpbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3]
+ vrsqrtpbf16 xmm2 {k7}, xmm3
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3]
+ vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vrsqrtpbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3]
+ vrsqrtpbf16 zmm2, zmm3
+
+// CHECK: vrsqrtpbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3]
+ vrsqrtpbf16 zmm2 {k7}, zmm3
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3]
+ vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vrsqrtpbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3]
+ vrsqrtpbf16 ymm2, ymm3
+
+// CHECK: vrsqrtpbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3]
+ vrsqrtpbf16 ymm2 {k7}, ymm3
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3]
+ vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10]
+ vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f]
+ vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80]
+ vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10]
+ vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80]
+ vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10]
+ vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80]
+ vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4]
+ vscalefpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4]
+ vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4]
+ vscalefpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4]
+ vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4]
+ vscalefpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4]
+ vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10]
+ vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10]
+ vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10]
+ vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3]
+ vsqrtnepbf16 xmm2, xmm3
+
+// CHECK: vsqrtnepbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3]
+ vsqrtnepbf16 xmm2 {k7}, xmm3
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3]
+ vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vsqrtnepbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3]
+ vsqrtnepbf16 zmm2, zmm3
+
+// CHECK: vsqrtnepbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3]
+ vsqrtnepbf16 zmm2 {k7}, zmm3
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3]
+ vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vsqrtnepbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3]
+ vsqrtnepbf16 ymm2, ymm3
+
+// CHECK: vsqrtnepbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3]
+ vsqrtnepbf16 ymm2 {k7}, ymm3
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3]
+ vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10]
+ vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f]
+ vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80]
+ vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10]
+ vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f]
+ vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80]
+ vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10]
+ vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f]
+ vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80]
+ vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4]
+ vsubnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4]
+ vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4]
+ vsubnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4]
+ vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4]
+ vsubnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4]
+ vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10]
+ vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10]
+ vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10]
+ vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-att.s b/llvm/test/MC/X86/avx10.2-bf16-64-att.s
new file mode 100644
index 0000000000000..85d99cfe0a704
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-64-att.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f]
+ vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f]
+ vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f]
+ vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %ymm24, %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %xmm24, %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %zmm24, %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+
+// CHECK: vcomsbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7]
+ vcomsbf16 %xmm23, %xmm22
+
+// CHECK: vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcomsbf16 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomsbf16 291(%r8,%rax,4), %xmm22
+
+// CHECK: vcomsbf16 (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomsbf16 (%rip), %xmm22
+
+// CHECK: vcomsbf16 -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 -64(,%rbp,2), %xmm22
+
+// CHECK: vcomsbf16 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f]
+ vcomsbf16 254(%rcx), %xmm22
+
+// CHECK: vcomsbf16 -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80]
+ vcomsbf16 -256(%rdx), %xmm22
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f]
+ vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f]
+ vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f]
+ vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f]
+ vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f]
+ vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfpclasspbf16 $123, %zmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %zmm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %ymm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %ymm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %xmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %xmm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+
+// CHECK: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to8}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to8}, %k5
+
+// CHECK: vfpclasspbf16x $123, -512(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16x $123, -512(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to16}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to16}, %k5
+
+// CHECK: vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to32}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to32}, %k5
+
+// CHECK: vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vgetexppbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f]
+ vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vgetexppbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f]
+ vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vgetexppbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f]
+ vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f]
+ vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f]
+ vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f]
+ vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f]
+ vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f]
+ vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f]
+ vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f]
+ vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f]
+ vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f]
+ vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22
+
+// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22
+
+// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22
+
+// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vrcppbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f]
+ vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vrcppbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f]
+ vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vrcppbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f]
+ vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vrsqrtpbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f]
+ vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f]
+ vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f]
+ vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f]
+ vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vsqrtnepbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f]
+ vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f]
+ vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f]
+ vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f]
+ vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f]
+ vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f]
+ vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-intel.s b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s
new file mode 100644
index 0000000000000..5f3dc45ba7745
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0]
+ vaddnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0]
+ vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0]
+ vaddnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0]
+ vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0]
+ vaddnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0]
+ vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcmppbf16 k5, ymm23, ymm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, ymm23, ymm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, xmm23, xmm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, zmm23, zmm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+
+// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vcomsbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7]
+ vcomsbf16 xmm22, xmm23
+
+// CHECK: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomsbf16 xmm22, word ptr [rip]
+
+// CHECK: vcomsbf16 xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f]
+ vcomsbf16 xmm22, word ptr [rcx + 254]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80]
+ vcomsbf16 xmm22, word ptr [rdx - 256]
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0]
+ vdivnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0]
+ vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0]
+ vdivnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0]
+ vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0]
+ vdivnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0]
+ vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0]
+ vfmadd132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0]
+ vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0]
+ vfmadd132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0]
+ vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0]
+ vfmadd132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0]
+ vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0]
+ vfmsub231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0]
+ vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0]
+ vfmsub231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0]
+ vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0]
+ vfmsub231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0]
+ vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfpclasspbf16 k5, zmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, zmm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, zmm23, 123
+
+// CHECK: vfpclasspbf16 k5, ymm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, ymm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, ymm23, 123
+
+// CHECK: vfpclasspbf16 k5, xmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, xmm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, xmm23, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+
+// CHECK: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vgetexppbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7]
+ vgetexppbf16 xmm22, xmm23
+
+// CHECK: vgetexppbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7]
+ vgetexppbf16 xmm22 {k7}, xmm23
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7]
+ vgetexppbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vgetexppbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7]
+ vgetexppbf16 zmm22, zmm23
+
+// CHECK: vgetexppbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7]
+ vgetexppbf16 zmm22 {k7}, zmm23
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7]
+ vgetexppbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vgetexppbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7]
+ vgetexppbf16 ymm22, ymm23
+
+// CHECK: vgetexppbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7]
+ vgetexppbf16 ymm22 {k7}, ymm23
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7]
+ vgetexppbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f]
+ vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80]
+ vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f]
+ vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80]
+ vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f]
+ vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80]
+ vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vgetmantpbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22, zmm23, 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22, ymm23, 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0]
+ vmaxpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0]
+ vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0]
+ vmaxpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0]
+ vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0]
+ vmaxpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0]
+ vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0]
+ vminpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0]
+ vminpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0]
+ vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vminpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0]
+ vminpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0]
+ vminpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0]
+ vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vminpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0]
+ vminpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0]
+ vminpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0]
+ vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f]
+ vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80]
+ vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f]
+ vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80]
+ vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f]
+ vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80]
+ vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0]
+ vmulnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0]
+ vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0]
+ vmulnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0]
+ vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0]
+ vmulnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0]
+ vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrcppbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7]
+ vrcppbf16 xmm22, xmm23
+
+// CHECK: vrcppbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7]
+ vrcppbf16 xmm22 {k7}, xmm23
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7]
+ vrcppbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vrcppbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7]
+ vrcppbf16 zmm22, zmm23
+
+// CHECK: vrcppbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7]
+ vrcppbf16 zmm22 {k7}, zmm23
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7]
+ vrcppbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vrcppbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7]
+ vrcppbf16 ymm22, ymm23
+
+// CHECK: vrcppbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7]
+ vrcppbf16 ymm22 {k7}, ymm23
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7]
+ vrcppbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f]
+ vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80]
+ vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f]
+ vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80]
+ vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f]
+ vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80]
+ vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vreducenepbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22, zmm23, 123
+
+// CHECK: vreducenepbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vreducenepbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22, ymm23, 123
+
+// CHECK: vreducenepbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vreducenepbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22, zmm23, 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22, ymm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vrsqrtpbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7]
+ vrsqrtpbf16 xmm22, xmm23
+
+// CHECK: vrsqrtpbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7]
+ vrsqrtpbf16 xmm22 {k7}, xmm23
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7]
+ vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vrsqrtpbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7]
+ vrsqrtpbf16 zmm22, zmm23
+
+// CHECK: vrsqrtpbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7]
+ vrsqrtpbf16 zmm22 {k7}, zmm23
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7]
+ vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vrsqrtpbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7]
+ vrsqrtpbf16 ymm22, ymm23
+
+// CHECK: vrsqrtpbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7]
+ vrsqrtpbf16 ymm22 {k7}, ymm23
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7]
+ vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f]
+ vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80]
+ vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80]
+ vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80]
+ vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0]
+ vscalefpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0]
+ vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0]
+ vscalefpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0]
+ vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0]
+ vscalefpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0]
+ vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7]
+ vsqrtnepbf16 xmm22, xmm23
+
+// CHECK: vsqrtnepbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7]
+ vsqrtnepbf16 xmm22 {k7}, xmm23
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7]
+ vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vsqrtnepbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7]
+ vsqrtnepbf16 zmm22, zmm23
+
+// CHECK: vsqrtnepbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7]
+ vsqrtnepbf16 zmm22 {k7}, zmm23
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7]
+ vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vsqrtnepbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7]
+ vsqrtnepbf16 ymm22, ymm23
+
+// CHECK: vsqrtnepbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7]
+ vsqrtnepbf16 ymm22 {k7}, ymm23
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7]
+ vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f]
+ vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80]
+ vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f]
+ vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80]
+ vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f]
+ vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80]
+ vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0]
+ vsubnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0]
+ vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0]
+ vsubnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0]
+ vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0]
+ vsubnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0]
+ vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index f31c4baada141..fbaa53712a339 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1176,6 +1176,8 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VCOMISSrr, X86::VCOMISSrm, 0},
{X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VCOMSBF16Zrr, X86::VCOMSBF16Zrm, 0},
+ {X86::VCOMSBF16Zrr_Int, X86::VCOMSBF16Zrm_Int, TB_NO_REVERSE},
{X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0},
{X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0},
@@ -1410,6 +1412,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VEXPANDPSZ128rr, X86::VEXPANDPSZ128rm, TB_NO_REVERSE},
{X86::VEXPANDPSZ256rr, X86::VEXPANDPSZ256rm, TB_NO_REVERSE},
{X86::VEXPANDPSZrr, X86::VEXPANDPSZrm, TB_NO_REVERSE},
+ {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rm, 0},
+ {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rm, 0},
+ {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrm, 0},
{X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0},
{X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0},
{X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0},
@@ -1428,6 +1433,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VFRCZPSrr, X86::VFRCZPSrm, 0},
{X86::VFRCZSDrr, X86::VFRCZSDrm, TB_NO_REVERSE},
{X86::VFRCZSSrr, X86::VFRCZSSrm, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128m, 0},
+ {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256m, 0},
+ {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zm, 0},
{X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0},
{X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0},
{X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0},
@@ -1437,6 +1445,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0},
{X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0},
{X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0},
+ {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmi, 0},
+ {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmi, 0},
+ {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmi, 0},
{X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0},
{X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0},
{X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0},
@@ -1770,11 +1781,17 @@ static const X86FoldTableEntry Table1[] = {
{X86::VRCP14PSZr, X86::VRCP14PSZm, 0},
{X86::VRCP28PDZr, X86::VRCP28PDZm, 0},
{X86::VRCP28PSZr, X86::VRCP28PSZm, 0},
+ {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128m, 0},
+ {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256m, 0},
+ {X86::VRCPPBF16Zr, X86::VRCPPBF16Zm, 0},
{X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0},
{X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0},
{X86::VRCPPHZr, X86::VRCPPHZm, 0},
{X86::VRCPPSYr, X86::VRCPPSYm, 0},
{X86::VRCPPSr, X86::VRCPPSm, 0},
+ {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmi, 0},
+ {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmi, 0},
+ {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmi, 0},
{X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0},
{X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0},
{X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0},
@@ -1784,6 +1801,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0},
{X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0},
{X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0},
+ {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmi, 0},
+ {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmi, 0},
+ {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmi, 0},
{X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0},
{X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0},
{X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0},
@@ -1805,11 +1825,17 @@ static const X86FoldTableEntry Table1[] = {
{X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0},
{X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0},
{X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0},
+ {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128m, 0},
+ {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256m, 0},
+ {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zm, 0},
{X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0},
{X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0},
{X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0},
{X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0},
{X86::VRSQRTPSr, X86::VRSQRTPSm, 0},
+ {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128m, 0},
+ {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256m, 0},
+ {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zm, 0},
{X86::VSQRTPDYr, X86::VSQRTPDYm, 0},
{X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0},
{X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0},
@@ -2284,6 +2310,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16},
{X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16},
{X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16},
+ {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rm, 0},
+ {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rm, 0},
+ {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrm, 0},
{X86::VADDPDYrr, X86::VADDPDYrm, 0},
{X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0},
{X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0},
@@ -2381,6 +2410,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VBROADCASTSSZ128rrkz, X86::VBROADCASTSSZ128rmkz, TB_NO_REVERSE},
{X86::VBROADCASTSSZ256rrkz, X86::VBROADCASTSSZ256rmkz, TB_NO_REVERSE},
{X86::VBROADCASTSSZrrkz, X86::VBROADCASTSSZrmkz, TB_NO_REVERSE},
+ {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmi, 0},
+ {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmi, 0},
+ {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmi, 0},
{X86::VCMPPDYrri, X86::VCMPPDYrmi, 0},
{X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0},
{X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0},
@@ -2608,6 +2640,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0},
{X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0},
{X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0},
+ {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rm, 0},
+ {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rm, 0},
+ {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrm, 0},
{X86::VDIVPDYrr, X86::VDIVPDYrm, 0},
{X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0},
{X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0},
@@ -2690,6 +2725,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE},
{X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0},
{X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE},
+ {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmk, 0},
+ {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmk, 0},
+ {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmk, 0},
{X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0},
{X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0},
{X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0},
@@ -2702,6 +2740,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE},
{X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE},
{X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mkz, 0},
+ {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mkz, 0},
+ {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmkz, 0},
{X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0},
{X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0},
{X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0},
@@ -2714,6 +2755,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE},
{X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE},
{X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE},
+ {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmikz, 0},
+ {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmikz, 0},
+ {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmikz, 0},
{X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0},
{X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0},
{X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0},
@@ -2781,6 +2825,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0},
{X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0},
{X86::VMAXCSSrr, X86::VMAXCSSrm, 0},
+ {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rm, 0},
+ {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rm, 0},
+ {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrm, 0},
{X86::VMAXPDYrr, X86::VMAXPDYrm, 0},
{X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0},
{X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0},
@@ -2822,6 +2869,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMINCSHZrr, X86::VMINCSHZrm, 0},
{X86::VMINCSSZrr, X86::VMINCSSZrm, 0},
{X86::VMINCSSrr, X86::VMINCSSrm, 0},
+ {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rm, 0},
+ {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rm, 0},
+ {X86::VMINPBF16Zrr, X86::VMINPBF16Zrm, 0},
{X86::VMINPDYrr, X86::VMINPDYrm, 0},
{X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0},
{X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0},
@@ -2893,6 +2943,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMPSADBWZ256rri, X86::VMPSADBWZ256rmi, 0},
{X86::VMPSADBWZrri, X86::VMPSADBWZrmi, 0},
{X86::VMPSADBWrri, X86::VMPSADBWrmi, 0},
+ {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rm, 0},
+ {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rm, 0},
+ {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrm, 0},
{X86::VMULPDYrr, X86::VMULPDYrm, 0},
{X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0},
{X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0},
@@ -3743,12 +3796,18 @@ static const X86FoldTableEntry Table2[] = {
{X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0},
{X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE},
{X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE},
+ {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mkz, 0},
+ {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mkz, 0},
+ {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmkz, 0},
{X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0},
{X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0},
{X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0},
{X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE},
{X86::VRCPSSr, X86::VRCPSSm, 0},
{X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE},
+ {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmikz, 0},
+ {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmikz, 0},
+ {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmikz, 0},
{X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0},
{X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0},
{X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0},
@@ -3761,6 +3820,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE},
{X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE},
{X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE},
+ {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmikz, 0},
+ {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmikz, 0},
+ {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmikz, 0},
{X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0},
{X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0},
{X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0},
@@ -3792,12 +3854,18 @@ static const X86FoldTableEntry Table2[] = {
{X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0},
{X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE},
{X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE},
+ {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mkz, 0},
+ {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mkz, 0},
+ {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmkz, 0},
{X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0},
{X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0},
{X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0},
{X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE},
{X86::VRSQRTSSr, X86::VRSQRTSSm, 0},
{X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rm, 0},
+ {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rm, 0},
+ {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrm, 0},
{X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0},
{X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0},
{X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0},
@@ -3832,6 +3900,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
{X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
{X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
+ {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
+ {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},
+ {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmkz, 0},
{X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0},
{X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0},
{X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0},
@@ -3851,6 +3922,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE},
{X86::VSQRTSSr, X86::VSQRTSSm, 0},
{X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rm, 0},
+ {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rm, 0},
+ {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrm, 0},
{X86::VSUBPDYrr, X86::VSUBPDYrm, 0},
{X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0},
{X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0},
@@ -3925,6 +3999,9 @@ static const X86FoldTableEntry Table2[] = {
};
static const X86FoldTableEntry Table3[] = {
+ {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmkz, 0},
+ {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmkz, 0},
+ {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmkz, 0},
{X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0},
{X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0},
{X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0},
@@ -3971,6 +4048,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VBROADCASTSSZ128rrk, X86::VBROADCASTSSZ128rmk, TB_NO_REVERSE},
{X86::VBROADCASTSSZ256rrk, X86::VBROADCASTSSZ256rmk, TB_NO_REVERSE},
{X86::VBROADCASTSSZrrk, X86::VBROADCASTSSZrmk, TB_NO_REVERSE},
+ {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmik, 0},
+ {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmik, 0},
+ {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmik, 0},
{X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0},
{X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0},
{X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0},
@@ -4145,6 +4225,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0},
{X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0},
{X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0},
+ {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmkz, 0},
+ {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmkz, 0},
+ {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmkz, 0},
{X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0},
{X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0},
{X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0},
@@ -4184,6 +4267,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmi, 0},
{X86::VFIXUPIMMSDZrri, X86::VFIXUPIMMSDZrmi, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrri, X86::VFIXUPIMMSSZrmi, TB_NO_REVERSE},
+ {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128m, 0},
+ {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256m, 0},
+ {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zm, 0},
{X86::VFMADD132PDYr, X86::VFMADD132PDYm, 0},
{X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, 0},
{X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0},
@@ -4207,6 +4293,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE},
{X86::VFMADD132SSr, X86::VFMADD132SSm, 0},
{X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_NO_REVERSE},
+ {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128m, 0},
+ {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256m, 0},
+ {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zm, 0},
{X86::VFMADD213PDYr, X86::VFMADD213PDYm, 0},
{X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, 0},
{X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0},
@@ -4230,6 +4319,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE},
{X86::VFMADD213SSr, X86::VFMADD213SSm, 0},
{X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_NO_REVERSE},
+ {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128m, 0},
+ {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256m, 0},
+ {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zm, 0},
{X86::VFMADD231PDYr, X86::VFMADD231PDYm, 0},
{X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, 0},
{X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0},
@@ -4308,6 +4400,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, 0},
{X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, 0},
{X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, 0},
+ {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128m, 0},
+ {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256m, 0},
+ {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zm, 0},
{X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, 0},
{X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, 0},
{X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0},
@@ -4331,6 +4426,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE},
{X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0},
{X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_NO_REVERSE},
+ {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128m, 0},
+ {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256m, 0},
+ {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zm, 0},
{X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, 0},
{X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, 0},
{X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0},
@@ -4354,6 +4452,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE},
{X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0},
{X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_NO_REVERSE},
+ {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128m, 0},
+ {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256m, 0},
+ {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zm, 0},
{X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, 0},
{X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, 0},
{X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0},
@@ -4432,6 +4533,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0},
{X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0},
{X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE},
+ {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128m, 0},
+ {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256m, 0},
+ {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zm, 0},
{X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0},
{X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0},
{X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0},
@@ -4455,6 +4559,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE},
{X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0},
{X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128m, 0},
+ {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256m, 0},
+ {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zm, 0},
{X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, 0},
{X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, 0},
{X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0},
@@ -4478,6 +4585,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE},
{X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0},
{X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128m, 0},
+ {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256m, 0},
+ {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zm, 0},
{X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, 0},
{X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, 0},
{X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0},
@@ -4509,6 +4619,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE},
{X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0},
{X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128m, 0},
+ {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256m, 0},
+ {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zm, 0},
{X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, 0},
{X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, 0},
{X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0},
@@ -4532,6 +4645,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE},
{X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0},
{X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128m, 0},
+ {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256m, 0},
+ {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zm, 0},
{X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, 0},
{X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, 0},
{X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0},
@@ -4555,6 +4671,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE},
{X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0},
{X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128m, 0},
+ {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256m, 0},
+ {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zm, 0},
{X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, 0},
{X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, 0},
{X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0},
@@ -4586,6 +4705,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE},
{X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0},
{X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mk, 0},
+ {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mk, 0},
+ {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmk, 0},
{X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0},
{X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0},
{X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0},
@@ -4598,6 +4720,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE},
{X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE},
{X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE},
+ {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmik, 0},
+ {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmik, 0},
+ {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmik, 0},
{X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0},
{X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0},
{X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0},
@@ -4640,6 +4765,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0},
{X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0},
{X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0},
+ {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmkz, 0},
+ {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmkz, 0},
+ {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmkz, 0},
{X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0},
{X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0},
{X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0},
@@ -4661,6 +4789,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0},
{X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0},
{X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0},
+ {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0},
+ {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0},
+ {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0},
{X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0},
{X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0},
{X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0},
@@ -4715,6 +4846,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMPSADBWZ128rrikz, X86::VMPSADBWZ128rmikz, 0},
{X86::VMPSADBWZ256rrikz, X86::VMPSADBWZ256rmikz, 0},
{X86::VMPSADBWZrrikz, X86::VMPSADBWZrmikz, 0},
+ {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmkz, 0},
+ {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmkz, 0},
+ {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmkz, 0},
{X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0},
{X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0},
{X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0},
@@ -5420,10 +5554,16 @@ static const X86FoldTableEntry Table3[] = {
{X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0},
{X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE},
{X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE},
+ {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mk, 0},
+ {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mk, 0},
+ {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmk, 0},
{X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0},
{X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0},
{X86::VRCPPHZrk, X86::VRCPPHZmk, 0},
{X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE},
+ {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmik, 0},
+ {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmik, 0},
+ {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmik, 0},
{X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0},
{X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0},
{X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0},
@@ -5436,6 +5576,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE},
{X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE},
{X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE},
+ {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmik, 0},
+ {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmik, 0},
+ {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmik, 0},
{X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0},
{X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0},
{X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0},
@@ -5460,10 +5603,16 @@ static const X86FoldTableEntry Table3[] = {
{X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0},
{X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE},
{X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE},
+ {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mk, 0},
+ {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mk, 0},
+ {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmk, 0},
{X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0},
{X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0},
{X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0},
{X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmkz, 0},
+ {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmkz, 0},
+ {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmkz, 0},
{X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0},
{X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0},
{X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0},
@@ -5493,6 +5642,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VSM3MSG1rr, X86::VSM3MSG1rm, 0},
{X86::VSM3MSG2rr, X86::VSM3MSG2rm, 0},
{X86::VSM3RNDS2rr, X86::VSM3RNDS2rm, 0},
+ {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mk, 0},
+ {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mk, 0},
+ {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmk, 0},
{X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0},
{X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0},
{X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0},
@@ -5505,6 +5657,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE},
{X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE},
{X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmkz, 0},
+ {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmkz, 0},
+ {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmkz, 0},
{X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0},
{X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0},
{X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0},
@@ -5538,6 +5693,9 @@ static const X86FoldTableEntry Table3[] = {
};
static const X86FoldTableEntry Table4[] = {
+ {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmk, 0},
+ {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmk, 0},
+ {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmk, 0},
{X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0},
{X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0},
{X86::VADDPDZrrk, X86::VADDPDZrmk, 0},
@@ -5580,6 +5738,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0},
{X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0},
{X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0},
+ {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmk, 0},
+ {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmk, 0},
+ {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmk, 0},
{X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0},
{X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0},
{X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0},
@@ -5626,6 +5787,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFIXUPIMMSDZrrikz, X86::VFIXUPIMMSDZrmikz, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrrik, X86::VFIXUPIMMSSZrmik, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrrikz, X86::VFIXUPIMMSSZrmikz, TB_NO_REVERSE},
+ {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mk, 0},
+ {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mkz, 0},
+ {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mk, 0},
+ {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mkz, 0},
+ {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmk, 0},
+ {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmkz, 0},
{X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mk, 0},
{X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mkz, 0},
{X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mk, 0},
@@ -5650,6 +5817,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE},
{X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mk, 0},
+ {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mkz, 0},
+ {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mk, 0},
+ {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mkz, 0},
+ {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmk, 0},
+ {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmkz, 0},
{X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0},
{X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mkz, 0},
{X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mk, 0},
@@ -5674,6 +5847,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE},
{X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mk, 0},
+ {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mkz, 0},
+ {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mk, 0},
+ {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mkz, 0},
+ {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmk, 0},
+ {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmkz, 0},
{X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0},
{X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mkz, 0},
{X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mk, 0},
@@ -5760,6 +5939,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mkz, 0},
{X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmk, 0},
{X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmkz, 0},
+ {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mk, 0},
+ {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mkz, 0},
+ {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mk, 0},
+ {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mkz, 0},
+ {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmk, 0},
+ {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmkz, 0},
{X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mk, 0},
{X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mkz, 0},
{X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mk, 0},
@@ -5784,6 +5969,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE},
{X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mk, 0},
+ {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mkz, 0},
+ {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mk, 0},
+ {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mkz, 0},
+ {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmk, 0},
+ {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmkz, 0},
{X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0},
{X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mkz, 0},
{X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mk, 0},
@@ -5808,6 +5999,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE},
{X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mk, 0},
+ {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mkz, 0},
+ {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mk, 0},
+ {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mkz, 0},
+ {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmk, 0},
+ {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmkz, 0},
{X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0},
{X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mkz, 0},
{X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mk, 0},
@@ -5890,6 +6087,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0},
{X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0},
{X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE},
+ {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mk, 0},
+ {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mkz, 0},
+ {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mk, 0},
+ {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mkz, 0},
+ {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmk, 0},
+ {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmkz, 0},
{X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0},
{X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0},
{X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0},
@@ -5914,6 +6117,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mk, 0},
+ {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mkz, 0},
+ {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mk, 0},
+ {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mkz, 0},
+ {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmk, 0},
+ {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmkz, 0},
{X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0},
{X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mkz, 0},
{X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mk, 0},
@@ -5938,6 +6147,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mk, 0},
+ {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mkz, 0},
+ {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mk, 0},
+ {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mkz, 0},
+ {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmk, 0},
+ {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmkz, 0},
{X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0},
{X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mkz, 0},
{X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mk, 0},
@@ -5962,6 +6177,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mk, 0},
+ {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mk, 0},
+ {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmk, 0},
+ {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmkz, 0},
{X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0},
{X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mkz, 0},
{X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mk, 0},
@@ -5986,6 +6207,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mk, 0},
+ {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mk, 0},
+ {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmk, 0},
+ {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmkz, 0},
{X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0},
{X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mkz, 0},
{X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mk, 0},
@@ -6010,6 +6237,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mk, 0},
+ {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mk, 0},
+ {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmk, 0},
+ {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmkz, 0},
{X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0},
{X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mkz, 0},
{X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mk, 0},
@@ -6070,6 +6303,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0},
{X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0},
{X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0},
+ {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmk, 0},
+ {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmk, 0},
+ {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmk, 0},
{X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0},
{X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0},
{X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0},
@@ -6091,6 +6327,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0},
{X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0},
{X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0},
+ {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0},
+ {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0},
+ {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0},
{X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0},
{X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0},
{X86::VMINPDZrrk, X86::VMINPDZrmk, 0},
@@ -6106,6 +6345,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0},
{X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0},
{X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0},
+ {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmk, 0},
+ {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmk, 0},
+ {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmk, 0},
{X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0},
{X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0},
{X86::VMULPDZrrk, X86::VMULPDZrmk, 0},
@@ -6609,6 +6851,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE},
{X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE},
{X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmk, 0},
+ {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmk, 0},
+ {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmk, 0},
{X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0},
{X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0},
{X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0},
@@ -6638,6 +6883,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE},
{X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE},
{X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmk, 0},
+ {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmk, 0},
+ {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmk, 0},
{X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0},
{X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0},
{X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0},
@@ -6820,6 +7068,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTW2PHZrr, X86::VCVTW2PHZrmb, TB_BCAST_W},
{X86::VEXP2PDZr, X86::VEXP2PDZmb, TB_BCAST_SD},
{X86::VEXP2PSZr, X86::VEXP2PSZmb, TB_BCAST_SS},
+ {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrmb, TB_BCAST_SH},
{X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rmb, TB_BCAST_SD},
{X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rmb, TB_BCAST_SD},
{X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrmb, TB_BCAST_SD},
@@ -6829,6 +7080,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rmb, TB_BCAST_SS},
{X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rmb, TB_BCAST_SS},
{X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrmb, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128mb, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256mb, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zmb, TB_BCAST_SH},
{X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128mb, TB_BCAST_SD},
{X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256mb, TB_BCAST_SD},
{X86::VGETEXPPDZr, X86::VGETEXPPDZmb, TB_BCAST_SD},
@@ -6838,6 +7092,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128mb, TB_BCAST_SS},
{X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256mb, TB_BCAST_SS},
{X86::VGETEXPPSZr, X86::VGETEXPPSZmb, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmbi, TB_BCAST_SH},
{X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmbi, TB_BCAST_SD},
{X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmbi, TB_BCAST_SD},
{X86::VGETMANTPDZrri, X86::VGETMANTPDZrmbi, TB_BCAST_SD},
@@ -6922,9 +7179,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VRCP14PSZr, X86::VRCP14PSZmb, TB_BCAST_SS},
{X86::VRCP28PDZr, X86::VRCP28PDZmb, TB_BCAST_SD},
{X86::VRCP28PSZr, X86::VRCP28PSZmb, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128mb, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256mb, TB_BCAST_SH},
+ {X86::VRCPPBF16Zr, X86::VRCPPBF16Zmb, TB_BCAST_SH},
{X86::VRCPPHZ128r, X86::VRCPPHZ128mb, TB_BCAST_SH},
{X86::VRCPPHZ256r, X86::VRCPPHZ256mb, TB_BCAST_SH},
{X86::VRCPPHZr, X86::VRCPPHZmb, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmbi, TB_BCAST_SH},
{X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmbi, TB_BCAST_SD},
{X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmbi, TB_BCAST_SD},
{X86::VREDUCEPDZrri, X86::VREDUCEPDZrmbi, TB_BCAST_SD},
@@ -6934,6 +7197,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmbi, TB_BCAST_SS},
{X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmbi, TB_BCAST_SS},
{X86::VREDUCEPSZrri, X86::VREDUCEPSZrmbi, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmbi, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmbi, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmbi, TB_BCAST_SD},
{X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmbi, TB_BCAST_SD},
@@ -6951,9 +7217,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VRSQRT14PSZr, X86::VRSQRT14PSZmb, TB_BCAST_SS},
{X86::VRSQRT28PDZr, X86::VRSQRT28PDZmb, TB_BCAST_SD},
{X86::VRSQRT28PSZr, X86::VRSQRT28PSZmb, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128mb, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256mb, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zmb, TB_BCAST_SH},
{X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128mb, TB_BCAST_SH},
{X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256mb, TB_BCAST_SH},
{X86::VRSQRTPHZr, X86::VRSQRTPHZmb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zmb, TB_BCAST_SH},
{X86::VSQRTPDZ128r, X86::VSQRTPDZ128mb, TB_BCAST_SD},
{X86::VSQRTPDZ256r, X86::VSQRTPDZ256mb, TB_BCAST_SD},
{X86::VSQRTPDZr, X86::VSQRTPDZmb, TB_BCAST_SD},
@@ -6966,6 +7238,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
};
static const X86FoldTableEntry BroadcastTable2[] = {
+ {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrmb, TB_BCAST_SH},
{X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD},
{X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD},
{X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD},
@@ -6999,6 +7274,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VBLENDMPSZ128rr, X86::VBLENDMPSZ128rmb, TB_BCAST_SS},
{X86::VBLENDMPSZ256rr, X86::VBLENDMPSZ256rmb, TB_BCAST_SS},
{X86::VBLENDMPSZrr, X86::VBLENDMPSZrmb, TB_BCAST_SS},
+ {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmbi, TB_BCAST_SH},
{X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD},
{X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD},
{X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD},
@@ -7158,6 +7436,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmbkz, TB_BCAST_W},
{X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmbkz, TB_BCAST_W},
{X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmbkz, TB_BCAST_W},
+ {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrmb, TB_BCAST_SH},
{X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD},
{X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD},
{X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD},
@@ -7175,6 +7456,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rmb, TB_BCAST_SS},
{X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rmb, TB_BCAST_SS},
{X86::VFMULCPHZrr, X86::VFMULCPHZrmb, TB_BCAST_SS},
+ {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmbk, TB_BCAST_SH},
{X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmbk, TB_BCAST_SD},
{X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmbk, TB_BCAST_SD},
{X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmbk, TB_BCAST_SD},
@@ -7184,6 +7468,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmbk, TB_BCAST_SS},
{X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmbk, TB_BCAST_SS},
{X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmbk, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmbkz, TB_BCAST_SH},
{X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mbkz, TB_BCAST_SD},
{X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mbkz, TB_BCAST_SD},
{X86::VGETEXPPDZrkz, X86::VGETEXPPDZmbkz, TB_BCAST_SD},
@@ -7193,6 +7480,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mbkz, TB_BCAST_SS},
{X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mbkz, TB_BCAST_SS},
{X86::VGETEXPPSZrkz, X86::VGETEXPPSZmbkz, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmbikz, TB_BCAST_SH},
{X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmbikz, TB_BCAST_SD},
{X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmbikz, TB_BCAST_SD},
{X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmbikz, TB_BCAST_SD},
@@ -7217,6 +7507,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS},
{X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS},
{X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrmb, TB_BCAST_SH},
{X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD},
{X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD},
{X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD},
@@ -7235,6 +7528,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS},
{X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS},
{X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMINPBF16Zrr, X86::VMINPBF16Zrmb, TB_BCAST_SH},
{X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD},
{X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD},
{X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD},
@@ -7244,6 +7540,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS},
{X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS},
{X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrmb, TB_BCAST_SH},
{X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD},
{X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD},
{X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD},
@@ -7537,9 +7836,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VRCP14PSZrkz, X86::VRCP14PSZmbkz, TB_BCAST_SS},
{X86::VRCP28PDZrkz, X86::VRCP28PDZmbkz, TB_BCAST_SD},
{X86::VRCP28PSZrkz, X86::VRCP28PSZmbkz, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmbkz, TB_BCAST_SH},
{X86::VRCPPHZ128rkz, X86::VRCPPHZ128mbkz, TB_BCAST_SH},
{X86::VRCPPHZ256rkz, X86::VRCPPHZ256mbkz, TB_BCAST_SH},
{X86::VRCPPHZrkz, X86::VRCPPHZmbkz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmbikz, TB_BCAST_SH},
{X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmbikz, TB_BCAST_SD},
{X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmbikz, TB_BCAST_SD},
{X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmbikz, TB_BCAST_SD},
@@ -7549,6 +7854,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmbikz, TB_BCAST_SS},
{X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmbikz, TB_BCAST_SS},
{X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmbikz, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmbikz, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmbikz, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmbikz, TB_BCAST_SD},
{X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmbikz, TB_BCAST_SD},
@@ -7566,9 +7874,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VRSQRT14PSZrkz, X86::VRSQRT14PSZmbkz, TB_BCAST_SS},
{X86::VRSQRT28PDZrkz, X86::VRSQRT28PDZmbkz, TB_BCAST_SD},
{X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmbkz, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmbkz, TB_BCAST_SH},
{X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mbkz, TB_BCAST_SH},
{X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mbkz, TB_BCAST_SH},
{X86::VRSQRTPHZrkz, X86::VRSQRTPHZmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrmb, TB_BCAST_SH},
{X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rmb, TB_BCAST_SD},
{X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rmb, TB_BCAST_SD},
{X86::VSCALEFPDZrr, X86::VSCALEFPDZrmb, TB_BCAST_SD},
@@ -7592,6 +7906,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmbi, TB_BCAST_SS},
{X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmbi, TB_BCAST_SS},
{X86::VSHUFPSZrri, X86::VSHUFPSZrmbi, TB_BCAST_SS},
+ {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmbkz, TB_BCAST_SH},
{X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mbkz, TB_BCAST_SD},
{X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mbkz, TB_BCAST_SD},
{X86::VSQRTPDZrkz, X86::VSQRTPDZmbkz, TB_BCAST_SD},
@@ -7601,6 +7918,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mbkz, TB_BCAST_SS},
{X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mbkz, TB_BCAST_SS},
{X86::VSQRTPSZrkz, X86::VSQRTPSZmbkz, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrmb, TB_BCAST_SH},
{X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD},
{X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD},
{X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD},
@@ -7631,6 +7951,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
};
static const X86FoldTableEntry BroadcastTable3[] = {
+ {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VADDPDZ128rrkz, X86::VADDPDZ128rmbkz, TB_BCAST_SD},
{X86::VADDPDZ256rrkz, X86::VADDPDZ256rmbkz, TB_BCAST_SD},
{X86::VADDPDZrrkz, X86::VADDPDZrmbkz, TB_BCAST_SD},
@@ -7664,6 +7987,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmbk, TB_BCAST_SS},
{X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmbk, TB_BCAST_SS},
{X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmbk, TB_BCAST_SS},
+ {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmbik, TB_BCAST_SH},
{X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmbik, TB_BCAST_SD},
{X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmbik, TB_BCAST_SD},
{X86::VCMPPDZrrik, X86::VCMPPDZrmbik, TB_BCAST_SD},
@@ -7823,6 +8149,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmbk, TB_BCAST_W},
{X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmbk, TB_BCAST_W},
{X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmbk, TB_BCAST_W},
+ {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmbkz, TB_BCAST_SD},
{X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmbkz, TB_BCAST_SD},
{X86::VDIVPDZrrkz, X86::VDIVPDZrmbkz, TB_BCAST_SD},
@@ -7849,6 +8178,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFIXUPIMMPSZ128rri, X86::VFIXUPIMMPSZ128rmbi, TB_BCAST_SS},
{X86::VFIXUPIMMPSZ256rri, X86::VFIXUPIMMPSZ256rmbi, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmbi, TB_BCAST_SS},
+ {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD},
{X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD},
{X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD},
@@ -7858,6 +8190,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS},
{X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS},
{X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS},
+ {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD},
{X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD},
{X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD},
@@ -7867,6 +8202,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS},
{X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS},
{X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS},
+ {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD},
{X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD},
{X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD},
@@ -7906,6 +8244,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS},
{X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS},
{X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD},
@@ -7915,6 +8256,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS},
{X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS},
{X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD},
@@ -7924,6 +8268,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS},
{X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS},
{X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD},
@@ -7963,6 +8310,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmbkz, TB_BCAST_SS},
{X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmbkz, TB_BCAST_SS},
{X86::VFMULCPHZrrkz, X86::VFMULCPHZrmbkz, TB_BCAST_SS},
+ {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD},
@@ -7972,6 +8322,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS},
+ {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD},
@@ -7981,6 +8334,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS},
+ {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD},
@@ -7990,6 +8346,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD},
@@ -7999,6 +8358,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD},
@@ -8008,6 +8370,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD},
@@ -8017,6 +8382,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmbk, TB_BCAST_SH},
{X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mbk, TB_BCAST_SD},
{X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mbk, TB_BCAST_SD},
{X86::VGETEXPPDZrk, X86::VGETEXPPDZmbk, TB_BCAST_SD},
@@ -8026,6 +8394,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mbk, TB_BCAST_SS},
{X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mbk, TB_BCAST_SS},
{X86::VGETEXPPSZrk, X86::VGETEXPPSZmbk, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmbik, TB_BCAST_SH},
{X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmbik, TB_BCAST_SD},
{X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmbik, TB_BCAST_SD},
{X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmbik, TB_BCAST_SD},
@@ -8050,6 +8421,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmbkz, TB_BCAST_SS},
{X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmbkz, TB_BCAST_SS},
{X86::VMAXCPSZrrkz, X86::VMAXCPSZrmbkz, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmbkz, TB_BCAST_SD},
{X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmbkz, TB_BCAST_SD},
{X86::VMAXPDZrrkz, X86::VMAXPDZrmbkz, TB_BCAST_SD},
@@ -8068,6 +8442,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmbkz, TB_BCAST_SS},
{X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmbkz, TB_BCAST_SS},
{X86::VMINCPSZrrkz, X86::VMINCPSZrmbkz, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMINPDZ128rrkz, X86::VMINPDZ128rmbkz, TB_BCAST_SD},
{X86::VMINPDZ256rrkz, X86::VMINPDZ256rmbkz, TB_BCAST_SD},
{X86::VMINPDZrrkz, X86::VMINPDZrmbkz, TB_BCAST_SD},
@@ -8077,6 +8454,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMINPSZ128rrkz, X86::VMINPSZ128rmbkz, TB_BCAST_SS},
{X86::VMINPSZ256rrkz, X86::VMINPSZ256rmbkz, TB_BCAST_SS},
{X86::VMINPSZrrkz, X86::VMINPSZrmbkz, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMULPDZ128rrkz, X86::VMULPDZ128rmbkz, TB_BCAST_SD},
{X86::VMULPDZ256rrkz, X86::VMULPDZ256rmbkz, TB_BCAST_SD},
{X86::VMULPDZrrkz, X86::VMULPDZrmbkz, TB_BCAST_SD},
@@ -8424,9 +8804,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VRCP14PSZrk, X86::VRCP14PSZmbk, TB_BCAST_SS},
{X86::VRCP28PDZrk, X86::VRCP28PDZmbk, TB_BCAST_SD},
{X86::VRCP28PSZrk, X86::VRCP28PSZmbk, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmbk, TB_BCAST_SH},
{X86::VRCPPHZ128rk, X86::VRCPPHZ128mbk, TB_BCAST_SH},
{X86::VRCPPHZ256rk, X86::VRCPPHZ256mbk, TB_BCAST_SH},
{X86::VRCPPHZrk, X86::VRCPPHZmbk, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmbik, TB_BCAST_SH},
{X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmbik, TB_BCAST_SD},
{X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmbik, TB_BCAST_SD},
{X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmbik, TB_BCAST_SD},
@@ -8436,6 +8822,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmbik, TB_BCAST_SS},
{X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmbik, TB_BCAST_SS},
{X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmbik, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmbik, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmbik, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmbik, TB_BCAST_SD},
{X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmbik, TB_BCAST_SD},
@@ -8453,9 +8842,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VRSQRT14PSZrk, X86::VRSQRT14PSZmbk, TB_BCAST_SS},
{X86::VRSQRT28PDZrk, X86::VRSQRT28PDZmbk, TB_BCAST_SD},
{X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmbk, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmbk, TB_BCAST_SH},
{X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mbk, TB_BCAST_SH},
{X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mbk, TB_BCAST_SH},
{X86::VRSQRTPHZrk, X86::VRSQRTPHZmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmbkz, TB_BCAST_SH},
{X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmbkz, TB_BCAST_SD},
{X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmbkz, TB_BCAST_SD},
{X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmbkz, TB_BCAST_SD},
@@ -8479,6 +8874,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmbikz, TB_BCAST_SS},
{X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmbikz, TB_BCAST_SS},
{X86::VSHUFPSZrrikz, X86::VSHUFPSZrmbikz, TB_BCAST_SS},
+ {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmbk, TB_BCAST_SH},
{X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mbk, TB_BCAST_SD},
{X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mbk, TB_BCAST_SD},
{X86::VSQRTPDZrk, X86::VSQRTPDZmbk, TB_BCAST_SD},
@@ -8488,6 +8886,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mbk, TB_BCAST_SS},
{X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mbk, TB_BCAST_SS},
{X86::VSQRTPSZrk, X86::VSQRTPSZmbk, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmbkz, TB_BCAST_SD},
{X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmbkz, TB_BCAST_SD},
{X86::VSUBPDZrrkz, X86::VSUBPDZrmbkz, TB_BCAST_SD},
@@ -8518,6 +8919,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
};
static const X86FoldTableEntry BroadcastTable4[] = {
+ {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VADDPDZ128rrk, X86::VADDPDZ128rmbk, TB_BCAST_SD},
{X86::VADDPDZ256rrk, X86::VADDPDZ256rmbk, TB_BCAST_SD},
{X86::VADDPDZrrk, X86::VADDPDZrmbk, TB_BCAST_SD},
@@ -8548,6 +8952,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmbk, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmbk, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmbk, TB_BCAST_SS},
+ {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmbk, TB_BCAST_SD},
{X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmbk, TB_BCAST_SD},
{X86::VDIVPDZrrk, X86::VDIVPDZrmbk, TB_BCAST_SD},
@@ -8584,6 +8991,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFIXUPIMMPSZ256rrikz, X86::VFIXUPIMMPSZ256rmbikz, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrrik, X86::VFIXUPIMMPSZrmbik, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrrikz, X86::VFIXUPIMMPSZrmbikz, TB_BCAST_SS},
+ {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mbk, TB_BCAST_SD},
@@ -8602,6 +9015,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADD132PSZ256rkz, X86::VFMADD132PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADD132PSZrk, X86::VFMADD132PSZmbk, TB_BCAST_SS},
{X86::VFMADD132PSZrkz, X86::VFMADD132PSZmbkz, TB_BCAST_SS},
+ {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mbk, TB_BCAST_SD},
@@ -8620,6 +9039,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADD213PSZ256rkz, X86::VFMADD213PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADD213PSZrk, X86::VFMADD213PSZmbk, TB_BCAST_SS},
{X86::VFMADD213PSZrkz, X86::VFMADD213PSZmbkz, TB_BCAST_SS},
+ {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mbk, TB_BCAST_SD},
@@ -8698,6 +9123,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmbk, TB_BCAST_SS},
{X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mbk, TB_BCAST_SD},
@@ -8716,6 +9147,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMSUB132PSZ256rkz, X86::VFMSUB132PSZ256mbkz, TB_BCAST_SS},
{X86::VFMSUB132PSZrk, X86::VFMSUB132PSZmbk, TB_BCAST_SS},
{X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mbk, TB_BCAST_SD},
@@ -8734,6 +9171,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMSUB213PSZ256rkz, X86::VFMSUB213PSZ256mbkz, TB_BCAST_SS},
{X86::VFMSUB213PSZrk, X86::VFMSUB213PSZmbk, TB_BCAST_SS},
{X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mbk, TB_BCAST_SD},
@@ -8809,6 +9252,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmbk, TB_BCAST_SS},
{X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmbk, TB_BCAST_SS},
{X86::VFMULCPHZrrk, X86::VFMULCPHZrmbk, TB_BCAST_SS},
+ {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mbk, TB_BCAST_SD},
@@ -8827,6 +9276,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD132PSZ256rkz, X86::VFNMADD132PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD132PSZrk, X86::VFNMADD132PSZmbk, TB_BCAST_SS},
{X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mbk, TB_BCAST_SD},
@@ -8845,6 +9300,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD213PSZ256rkz, X86::VFNMADD213PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD213PSZrk, X86::VFNMADD213PSZmbk, TB_BCAST_SS},
{X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mbk, TB_BCAST_SD},
@@ -8863,6 +9324,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD231PSZ256rkz, X86::VFNMADD231PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD231PSZrk, X86::VFNMADD231PSZmbk, TB_BCAST_SS},
{X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mbk, TB_BCAST_SD},
@@ -8881,6 +9348,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMSUB132PSZ256rkz, X86::VFNMSUB132PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMSUB132PSZrk, X86::VFNMSUB132PSZmbk, TB_BCAST_SS},
{X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mbk, TB_BCAST_SD},
@@ -8899,6 +9372,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMSUB213PSZ256rkz, X86::VFNMSUB213PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMSUB213PSZrk, X86::VFNMSUB213PSZmbk, TB_BCAST_SS},
{X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mbk, TB_BCAST_SD},
@@ -8932,6 +9411,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmbk, TB_BCAST_SS},
{X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmbk, TB_BCAST_SS},
{X86::VMAXCPSZrrk, X86::VMAXCPSZrmbk, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmbk, TB_BCAST_SH},
{X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmbk, TB_BCAST_SD},
{X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmbk, TB_BCAST_SD},
{X86::VMAXPDZrrk, X86::VMAXPDZrmbk, TB_BCAST_SD},
@@ -8950,6 +9432,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmbk, TB_BCAST_SS},
{X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmbk, TB_BCAST_SS},
{X86::VMINCPSZrrk, X86::VMINCPSZrmbk, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmbk, TB_BCAST_SH},
{X86::VMINPDZ128rrk, X86::VMINPDZ128rmbk, TB_BCAST_SD},
{X86::VMINPDZ256rrk, X86::VMINPDZ256rmbk, TB_BCAST_SD},
{X86::VMINPDZrrk, X86::VMINPDZrmbk, TB_BCAST_SD},
@@ -8959,6 +9444,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMINPSZ128rrk, X86::VMINPSZ128rmbk, TB_BCAST_SS},
{X86::VMINPSZ256rrk, X86::VMINPSZ256rmbk, TB_BCAST_SS},
{X86::VMINPSZrrk, X86::VMINPSZrmbk, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VMULPDZ128rrk, X86::VMULPDZ128rmbk, TB_BCAST_SD},
{X86::VMULPDZ256rrk, X86::VMULPDZ256rmbk, TB_BCAST_SD},
{X86::VMULPDZrrk, X86::VMULPDZrmbk, TB_BCAST_SD},
@@ -9249,6 +9737,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VRANGEPSZ128rrik, X86::VRANGEPSZ128rmbik, TB_BCAST_SS},
{X86::VRANGEPSZ256rrik, X86::VRANGEPSZ256rmbik, TB_BCAST_SS},
{X86::VRANGEPSZrrik, X86::VRANGEPSZrmbik, TB_BCAST_SS},
+ {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmbk, TB_BCAST_SH},
{X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmbk, TB_BCAST_SD},
{X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmbk, TB_BCAST_SD},
{X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmbk, TB_BCAST_SD},
@@ -9272,6 +9763,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmbik, TB_BCAST_SS},
{X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmbik, TB_BCAST_SS},
{X86::VSHUFPSZrrik, X86::VSHUFPSZrmbik, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmbk, TB_BCAST_SD},
{X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmbk, TB_BCAST_SD},
{X86::VSUBPDZrrk, X86::VSUBPDZrmbk, TB_BCAST_SD},
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index b0acd4ea4224a..8d63b8dc06ca0 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -786,8 +786,8 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
}
// We assume that the index can fit into uint16_t.
- assert(sEntryNumber < 65536U &&
- "Index into ModRMDecision is too large for uint16_t!");
+ assert(sEntryNumber < -1U &&
+ "Index into ModRMDecision is too large for unsigned int!");
(void)sEntryNumber;
}
More information about the cfe-commits
mailing list