[clang] [llvm] [X86][AVX10.2] Support AVX10.2-BF16 new instructions. (PR #101603)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 3 02:26:11 PDT 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/101603
>From 122a4829e507d7d0d59d50949d5538ad07ad243c Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 29 Jul 2024 10:51:29 +0800
Subject: [PATCH 1/5] Support AVX10.2-BF16 new instructions.
---
clang/include/clang/Basic/BuiltinsX86.def | 62 +
clang/lib/Basic/Targets/X86.cpp | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 23 +
clang/lib/Headers/CMakeLists.txt | 2 +
clang/lib/Headers/avx10_2_512bf16intrin.h | 565 +++
clang/lib/Headers/avx10_2bf16intrin.h | 1088 ++++++
clang/lib/Headers/immintrin.h | 2 +
.../CodeGen/X86/avx10_2_512bf16-builtins.c | 1054 ++++++
clang/test/CodeGen/X86/avx10_2bf16-builtins.c | 2018 +++++++++++
llvm/include/llvm/IR/IntrinsicsX86.td | 410 +++
.../lib/Target/X86/AsmParser/X86AsmParser.cpp | 8 +-
.../X86/MCTargetDesc/X86ATTInstPrinter.cpp | 12 +-
.../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 11 +
.../X86/MCTargetDesc/X86IntelInstPrinter.cpp | 9 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 37 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 310 ++
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 10 +
llvm/lib/Target/X86/X86InstrUtils.td | 6 +-
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 54 +
.../test/CodeGen/X86/avx10_2_512bf16-arith.ll | 587 ++++
.../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 296 ++
llvm/test/CodeGen/X86/avx10_2bf16-arith.ll | 1168 +++++++
.../CodeGen/X86/avx10_2bf16-intrinsics.ll | 536 +++
.../MC/Disassembler/X86/avx10.2-bf16-32.txt | 3015 +++++++++++++++++
.../MC/Disassembler/X86/avx10.2-bf16-64.txt | 3015 +++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-32-att.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-32-intel.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-64-att.s | 3014 ++++++++++++++++
llvm/test/MC/X86/avx10.2-bf16-64-intel.s | 3014 ++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 494 +++
30 files changed, 26839 insertions(+), 10 deletions(-)
create mode 100644 clang/lib/Headers/avx10_2_512bf16intrin.h
create mode 100644 clang/lib/Headers/avx10_2bf16intrin.h
create mode 100644 clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
create mode 100644 clang/test/CodeGen/X86/avx10_2bf16-builtins.c
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-32-intel.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-bf16-64-intel.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4aa8661b9a806..48376ee0527980 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2261,6 +2261,68 @@ TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:"
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
+
+// AVX10.2 BF16
+TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
+
#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index a9cbdb7b10dff8..62c382b67ad14a 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -306,6 +306,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX10_1_512 = true;
} else if (Feature == "+avx10.2-256") {
HasAVX10_2 = true;
+ HasFullBFloat16 = true;
} else if (Feature == "+avx10.2-512") {
HasAVX10_2_512 = true;
} else if (Feature == "+avx512cd") {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2a733e4d834cfa..94af4e5f723c9a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14728,6 +14728,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storeups512_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_storesbf16128_mask:
case X86::BI__builtin_ia32_storesh128_mask:
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask:
@@ -14836,6 +14837,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddph512_mask:
case X86::BI__builtin_ia32_vfmaddph512_maskz:
case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddnepbh128:
+ case X86::BI__builtin_ia32_vfmaddnepbh256:
+ case X86::BI__builtin_ia32_vfmaddnepbh512:
case X86::BI__builtin_ia32_vfmaddps512_mask:
case X86::BI__builtin_ia32_vfmaddps512_maskz:
case X86::BI__builtin_ia32_vfmaddps512_mask3:
@@ -14920,6 +14924,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi512_mask:
return EmitX86MaskedLoad(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_loadsbf16128_mask:
case X86::BI__builtin_ia32_loadsh128_mask:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
@@ -16074,6 +16079,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
+ case X86::BI__builtin_ia32_vsqrtnepbf16256:
+ case X86::BI__builtin_ia32_vsqrtnepbf16:
+ case X86::BI__builtin_ia32_vsqrtnepbf16512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
if (Ops.size() == 2) {
@@ -16293,6 +16301,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclassps128_mask:
case X86::BI__builtin_ia32_fpclassps256_mask:
case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
case X86::BI__builtin_ia32_fpclassph128_mask:
case X86::BI__builtin_ia32_fpclassph256_mask:
case X86::BI__builtin_ia32_fpclassph512_mask:
@@ -16307,6 +16318,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
+ break;
+ case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
+ break;
+ case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
+ ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
+ break;
case X86::BI__builtin_ia32_fpclassph128_mask:
ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
break;
@@ -16465,6 +16485,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vcmppd256_round_mask:
case X86::BI__builtin_ia32_vcmpps256_round_mask:
case X86::BI__builtin_ia32_vcmpph256_round_mask:
+ case X86::BI__builtin_ia32_vcmppbf16512_mask:
+ case X86::BI__builtin_ia32_vcmppbf16256_mask:
+ case X86::BI__builtin_ia32_vcmppbf16128_mask:
IsMaskFCmp = true;
[[fallthrough]];
case X86::BI__builtin_ia32_cmpps:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 5a62538792f301..90d431f8627965 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,11 +147,13 @@ set(x86_files
amxcomplexintrin.h
amxfp16intrin.h
amxintrin.h
+ avx10_2_512bf16intrin.h
avx10_2_512convertintrin.h
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
avx10_2_512satcvtintrin.h
avx10_2convertintrin.h
+ avx10_2bf16intrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtintrin.h
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
new file mode 100644
index 00000000000000..158d5686c8f02f
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -0,0 +1,565 @@
+/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2_512BF16INTRIN_H
+#define __AVX10_2_512BF16INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
+ __min_vector_width__(512)))
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
+ return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
+ return (__m512bh)__builtin_ia32_undef512();
+}
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
+ return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
+ __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
+ __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
+ __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
+ __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
+ __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
+ bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
+ bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
+ bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
+}
+
+#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
+ bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
+ bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
+ bf29, bf30, bf31, bf32) \
+ _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
+ (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
+ (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
+ (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
+ (bf3), (bf2), (bf1))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_ps(__m512bh __a) {
+ return (__m512)__a;
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_pd(__m512bh __a) {
+ return (__m512d)__a;
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_castpbf16_si512(__m512bh __a) {
+ return (__m512i)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpd_pbh(__m512d __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castsi512_pbh(__m512i __a) {
+ return (__m512bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16512_pbh128(__m512bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16512_pbh256(__m512bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16128_pbh512(__m128bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_castpbf16256_pbh512(__m256bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_zextpbf16128_pbh512(__m128bh __a) {
+ return __builtin_shufflevector(
+ __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_zextpbf16256_pbh512(__m256bh __a) {
+ return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
+ return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
+ (__m512i)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_load_pbh(void const *__p) {
+ return *(const __m512bh *)__p;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m512bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
+ __m512bh __A) {
+ *(__m512bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
+ __m512bh __A) {
+ struct __storeu_pbh {
+ __m512bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
+ return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
+ (__v32hi)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_addne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A + (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_subne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A - (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A * (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_divne_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)((__v32bf)__A / (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
+ __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
+ __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+#define _mm512_cmp_pbh_mask(A, B, P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
+ (__v32bf)(__m512bh)(B), \
+ (int)(P), (__mmask32) - 1))
+
+#define _mm512_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
+ (__v32bf)(__m512bh)(B), \
+ (int)(P), (__mmask32)(U)))
+
+#define _mm512_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32)(U)))
+
+#define _mm512_fpclass_pbh_mask(A, imm) \
+ ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32) - 1))
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
+ (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
+ __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
+ (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W,
+ (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_getexp_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_rsqrt_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
+ (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
+}
+
+#define _mm512_reducene_pbh(A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
+ (__mmask32) - 1))
+
+#define _mm512_mask_reducene_pbh(W, U, A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)(__m512bh)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_reducene_pbh(U, A, imm) \
+ ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(U)))
+
+#define _mm512_roundscalene_pbh(A, B) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(A), (int)(B), (__v32bf)(__m512bh)(A), \
+ (__mmask32) - 1))
+
+#define _mm512_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(C), (int)(imm), (__v32bf)(__m512bh)(A), \
+ (__mmask32)(B)))
+
+#define _mm512_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
+ (__v32bf)(__m512bh)(B), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(A)))
+
+#define _mm512_getmant_pbh(A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
+ (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
+
+#define _mm512_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), (__v32bf)(__m512bh)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_getmant_pbh(U, A, B, C) \
+ ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
+ (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
+ (__v32bf)_mm512_setzero_pbh(), (__mmask32)(U)))
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
+ return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
+ return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
+ (__v32bf)_mm512_sqrt_pbh(__A),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
+ -(__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
+ -(__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh(
+ __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__A);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh(
+ __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)__C);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh(
+ __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ return (__m512bh)__builtin_ia32_selectpbf_512(
+ (__mmask32)__U,
+ _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
+ (__v32bf)_mm512_setzero_pbh());
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif
+#endif
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
new file mode 100644
index 00000000000000..8c03ddbf2f71d1
--- /dev/null
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -0,0 +1,1088 @@
+/*===-------------- avx10_2bf16intrin.h - AVX10-BF16 intrinsics ------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2BF16INTRIN_H
+#define __AVX10_2BF16INTRIN_H
+
+typedef __bf16 __m128bh_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1)));
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) {
+ return __builtin_bit_cast(__m256bh, _mm256_setzero_ps());
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_setzero_pbh(void) {
+ return __builtin_bit_cast(__m128bh, _mm_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castpbf16_ps(__m128bh __a) {
+ return (__m128)__a;
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_ps(__m256bh __a) {
+ return (__m256)__a;
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_pd(__m256bh __a) {
+ return (__m256d)__a;
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castpbf16_pd(__m128bh __a) {
+ return (__m128d)__a;
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_castpbf16_si128(__m128bh __a) {
+ return (__m128i)__a;
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_castpbf16_si256(__m256bh __a) {
+ return (__m256i)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castps_pbh(__m128 __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_castps_pbh(__m256 __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtsbh_bf16(__m128bh __a) {
+ return __a[0];
+}
+
+static __inline__ __bf16 __DEFAULT_FN_ATTRS256
+_mm256_cvtsbh_bf16(__m256bh __a) {
+ return __a[0];
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castpd_pbh(__m128d __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castpd_pbh(__m256d __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_castsi128_pbh(__m128i __a) {
+ return (__m128bh)__a;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castsi256_pbh(__m256i __a) {
+ return (__m256bh)__a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS256
+_mm256_castpbf16256_pbh128(__m256bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_castpbf16128_pbh256(__m128bh __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_zextpbf16128_pbh256(__m128bh __a) {
+ return __builtin_shufflevector(__a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_undefined_pbh(void) {
+ return (__m256bh)__builtin_ia32_undef256();
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_load_sbh(void const *__dp) {
+ __m128bh src = (__v8bf)_mm_setzero_pbh();
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__dp, src,
+ 1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_load_sbh(__m128bh __W, __mmask8 __U, const void *__A) {
+ __m128bh src = (__v8bf)__builtin_shufflevector(
+ (__v8bf)__W, (__v8bf)_mm_setzero_pbh(), 0, 8, 8, 8, 8, 8, 8, 8);
+
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__A, src,
+ __U & 1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_load_sbh(__mmask8 __U, const void *__A) {
+ return (__m128bh)__builtin_ia32_loadsbf16128_mask(
+ (const __v8bf *)__A, (__v8bf)_mm_setzero_pbh(), __U & 1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_load_pbh(void const *__p) {
+ return *(const __m256bh *)__p;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_load_pbh(void const *__p) {
+ return *(const __m128bh *)__p;
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m256bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_loadu_pbh(void const *__p) {
+ struct __loadu_pbh {
+ __m128bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_pbh *)__p)->__v;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sbh(void *__dp,
+ __m128bh __a) {
+ struct __mm_store_sbh_struct {
+ __bf16 __u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_store_sbh_struct *)__dp)->__u = __a[0];
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sbh(void *__W,
+ __mmask8 __U,
+ __m128bh __A) {
+ __builtin_ia32_storesbf16128_mask((__v8bf *)__W, __A, __U & 1);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_pbh(void *__P,
+ __m256bh __A) {
+ *(__m256bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_pbh(void *__P,
+ __m128bh __A) {
+ *(__m128bh *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_pbh(void *__P,
+ __m256bh __A) {
+ struct __storeu_pbh {
+ __m256bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_pbh(void *__P,
+ __m128bh __A) {
+ struct __storeu_pbh {
+ __m128bh_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_pbh *)__P)->__v = __A;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
+ __m128bh __b) {
+ __a[0] = __b[0];
+ return __a;
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
+ _mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_undefined_pbh(void) {
+ return (__m128bh)__builtin_ia32_undef128();
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_set_sbh(__bf16 bf) {
+ return (__v8bf)__builtin_shufflevector(
+ (__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}, (__v8bf)_mm_setzero_pbh(), 0, 8,
+ 8, 8, 8, 8, 8, 8);
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_set1_pbh(__bf16 bf) {
+ return (__m128bh)(__v8bf){bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set1_pbh(__bf16 bf) {
+ return (__m256bh)(__v16bf){bf, bf, bf, bf, bf, bf, bf, bf,
+ bf, bf, bf, bf, bf, bf, bf, bf};
+}
+
+static __inline __m128bh __DEFAULT_FN_ATTRS128
+_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5,
+ __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ return (__m128bh)(__v8bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8};
+}
+
+static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set_pbh(
+ __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
+ __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ return (__m256bh)(__v16bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16};
+}
+
+#define _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8) \
+ _mm_set_pbh((bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), (bf1))
+
+#define _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
+ bf11, bf12, bf13, bf14, bf15, bf16) \
+ _mm256_set_pbh((bf16), (bf15), (bf14), (bf13), (bf12), (bf11), (bf10), \
+ (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), \
+ (bf1))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_abs_pbh(__m256bh __A) {
+ return (__m256bh)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF),
+ (__m256i)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_abs_pbh(__m128bh __A) {
+ return (__m128bh)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)__W,
+ (__v8bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
+ return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)__W,
+ (__v16bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
+ (__v8hi)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
+ (__v16hi)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_addne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A + (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_addne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A + (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_addne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_addne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_addne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_addne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_subne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A - (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_subne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A - (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_subne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_subne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_subne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_subne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mulne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A * (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mulne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A * (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_mulne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_mulne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_mulne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_mulne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_divne_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)((__v16bf)__A / (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_divne_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)((__v8bf)__A / (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_divne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_divne_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_divne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U,
+ (__v8bf)_mm_divne_pbh(__A, __B),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_max_pbh(__m256bh __A,
+ __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vmaxpbf16256((__v16bf)__A, (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_max_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vmaxpbf16128((__v8bf)__A, (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_max_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_max_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_min_pbh(__m256bh __A,
+ __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vminpbf16256((__v16bf)__A, (__v16bf)__B);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_min_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vminpbf16128((__v8bf)__A, (__v8bf)__B);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_min_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comeqsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16eq((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comltsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16lt((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comlesbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16le((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgtsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16gt((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgesbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16ge((__v8bf)A, (__v8bf)B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A,
+ __m128bh B) {
+ return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B);
+}
+
+#define _mm256_cmp_pbh_mask(A, B, P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
+ (__v16bf)(__m256bh)(B), \
+ (int)(P), (__mmask16) - 1))
+
+#define _mm256_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
+ (__v16bf)(__m256bh)(B), \
+ (int)(P), (__mmask16)(U)))
+
+#define _mm_cmp_pbh_mask(A, B, P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8) - 1))
+
+#define _mm_mask_cmp_pbh_mask(U, A, B, P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8)(U)))
+
+#define _mm256_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16)(U)))
+
+#define _mm256_fpclass_pbh_mask(A, imm) \
+ ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16) - 1))
+
+#define _mm_mask_fpclass_pbh_mask(U, A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
+ (int)(imm), (__mmask8)(U)))
+
+#define _mm_fpclass_pbh_mask(A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
+ (int)(imm), (__mmask8) - 1))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_scalef_pbh(__m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_undefined_pbh(),
+ (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pbh(
+ __m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ return (__m256bh)__builtin_ia32_vscalefpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_setzero_pbh(),
+ (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_scalef_pbh(__m128bh __A,
+ __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ return (__m128bh)__builtin_ia32_vscalefpbf16128_mask(
+ (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rcp_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask((__v16bf)__A, (__v16bf)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrcppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rcp_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrcppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_getexp_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vgetexppbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_getexp_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vgetexppbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_rsqrt_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask(
+ (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rsqrt_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask((__v8bf)__A, (__v8bf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask(
+ (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
+}
+
+#define _mm256_reducene_pbh(A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \
+ (__mmask16) - 1))
+
+#define _mm256_mask_reducene_pbh(W, U, A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)(__m256bh)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_reducene_pbh(U, A, imm) \
+ ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(U)))
+
+#define _mm_reducene_pbh(A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_reducene_pbh(W, U, A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)(__m128bh)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_reducene_pbh(U, A, imm) \
+ ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(U)))
+
+#define _mm256_roundscalene_pbh(A, B) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(B), (__v16bf)(__m256bh)(A), \
+ (__mmask16) - 1))
+
+#define _mm256_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(C), (int)(imm), (__v16bf)(__m256bh)(A), \
+ (__mmask16)(B)))
+
+#define _mm256_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
+ (__v16bf)(__m256bh)(B), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(A)))
+
+#define _mm_roundscalene_pbh(A, B) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(B), (__v8bf)(__m128bh)(A), (__mmask8) - 1))
+
+#define _mm_mask_roundscalene_pbh(A, B, C, imm) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(C), (int)(imm), (__v8bf)(__m128bh)(A), \
+ (__mmask8)(B)))
+
+#define _mm_maskz_roundscalene_pbh(A, B, imm) \
+ ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
+ (__v8bf)(__m128bh)(B), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(A)))
+
+#define _mm256_getmant_pbh(A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
+ (__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1))
+
+#define _mm256_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), (__v16bf)(__m256bh)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_getmant_pbh(U, A, B, C) \
+ ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
+ (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
+ (__v16bf)_mm256_setzero_pbh(), (__mmask16)(U)))
+
+#define _mm_getmant_pbh(A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
+ (__v8bf)_mm_undefined_pbh(), (__mmask8) - 1))
+
+#define _mm_mask_getmant_pbh(W, U, A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), (__v8bf)(__m128bh)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_pbh(U, A, B, C) \
+ ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
+ (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
+ (__v8bf)_mm_setzero_pbh(), (__mmask8)(U)))
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) {
+ return (__m256bh)__builtin_ia32_vsqrtnepbf16256((__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U, (__v16bf)_mm256_sqrt_pbh(__A), (__v16bf)__W);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
+ return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U,
+ (__v16bf)_mm256_sqrt_pbh(__A),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) {
+ return (__m128bh)__builtin_ia32_vsqrtnepbf16((__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)__W);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
+ -(__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmaddne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmaddne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmaddne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
+ -(__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsubne_pbh(
+ __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__A);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsubne_pbh(
+ __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)__C);
+}
+
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsubne_pbh(
+ __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ return (__m256bh)__builtin_ia32_selectpbf_256(
+ (__mmask16)__U,
+ _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C),
+ (__v16bf)_mm256_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmaddne_pbh(__m128bh __A,
+ __m128bh __B,
+ __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsubne_pbh(__m128bh __A,
+ __m128bh __B,
+ __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
+ -(__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
+ -(__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__A);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)__C);
+}
+
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ return (__m128bh)__builtin_ia32_selectpbf_128(
+ (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C),
+ (__v8bf)_mm_setzero_pbh());
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif
+#endif
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index a922056622e79f..30fcc028958f33 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -649,6 +649,7 @@ _storebe_i64(void * __P, long long __D) {
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
+#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
@@ -656,6 +657,7 @@ _storebe_i64(void * __P, long long __D) {
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
+#include <avx10_2_512bf16intrin.h>
#include <avx10_2_512convertintrin.h>
#include <avx10_2_512minmaxintrin.h>
#include <avx10_2_512niintrin.h>
diff --git a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
new file mode 100644
index 00000000000000..b14ff4d1f27e2a
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
@@ -0,0 +1,1054 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512bh test_mm512_setzero_pbh() {
+ // CHECK-LABEL: @test_mm512_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm512_setzero_pbh();
+}
+
+__m512bh test_mm512_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm512_undefined_pbh
+ // CHECK: ret <32 x bfloat> zeroinitializer
+ return _mm512_undefined_pbh();
+}
+
+__m512bh test_mm512_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm512_set1_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_set1_pbh(h);
+}
+
+__m512bh test_mm512_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16,
+ __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20,
+ __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24,
+ __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28,
+ __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ // CHECK-LABEL: @test_mm512_set_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16,
+ bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24,
+ bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32);
+}
+
+__m512bh test_mm512_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16,
+ __bf16 bf17, __bf16 bf18, __bf16 bf19, __bf16 bf20,
+ __bf16 bf21, __bf16 bf22, __bf16 bf23, __bf16 bf24,
+ __bf16 bf25, __bf16 bf26, __bf16 bf27, __bf16 bf28,
+ __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
+ // CHECK-LABEL: @test_mm512_setr_pbh
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 15
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 16
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 17
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 18
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 19
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 20
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 21
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 22
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 23
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 24
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 25
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 26
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 27
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 28
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 29
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 30
+ // CHECK: insertelement <32 x bfloat> {{.*}}, i32 31
+ return _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16,
+ bf17, bf18, bf19, bf20, bf21, bf22, bf23, bf24,
+ bf25, bf26, bf27, bf28, bf29, bf30, bf31, bf32);
+}
+
+__m512 test_mm512_castpbf16_ps(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_ps
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <16 x float>
+ return _mm512_castpbf16_ps(A);
+}
+
+__m512d test_mm512_castpbf16_pd(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_pd
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x double>
+ return _mm512_castpbf16_pd(A);
+}
+
+__m512i test_mm512_castpbf16_si512(__m512bh A) {
+ // CHECK-LABEL: test_mm512_castpbf16_si512
+ // CHECK: bitcast <32 x bfloat> %{{.*}} to <8 x i64>
+ return _mm512_castpbf16_si512(A);
+}
+
+__m512bh test_mm512_castps_pbh(__m512 A) {
+ // CHECK-LABEL: test_mm512_castps_pbh
+ // CHECK: bitcast <16 x float> %{{.*}} to <32 x bfloat>
+ return _mm512_castps_pbh(A);
+}
+
+__m512bh test_mm512_castpd_pbh(__m512d A) {
+ // CHECK-LABEL: test_mm512_castpd_pbh
+ // CHECK: bitcast <8 x double> %{{.*}} to <32 x bfloat>
+ return _mm512_castpd_pbh(A);
+}
+
+__m512bh test_mm512_castsi512_pbh(__m512i A) {
+ // CHECK-LABEL: test_mm512_castsi512_pbh
+ // CHECK: bitcast <8 x i64> %{{.*}} to <32 x bfloat>
+ return _mm512_castsi512_pbh(A);
+}
+
+__m128bh test_mm512_castpbf16512_pbh128(__m512bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16512_pbh128
+ // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ return _mm512_castpbf16512_pbh128(__a);
+}
+
+__m256bh test_mm512_castpbf16512_pbh256(__m512bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16512_pbh256
+ // CHECK: shufflevector <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm512_castpbf16512_pbh256(__a);
+}
+
+__m512bh test_mm512_castpbf16128_pbh512(__m128bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16128_pbh512
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm512_castpbf16128_pbh512(__a);
+}
+
+__m512bh test_mm512_castpbf16256_pbh512(__m256bh __a) {
+ // CHECK-LABEL: test_mm512_castpbf16256_pbh512
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm512_castpbf16256_pbh512(__a);
+}
+
+__m512bh test_mm512_zextpbf16128_pbh512(__m128bh __a) {
+ // CHECK-LABEL: test_mm512_zextpbf16128_pbh512
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm512_zextpbf16128_pbh512(__a);
+}
+
+__m512bh test_mm512_zextpbf16256_pbh512(__m256bh __a) {
+ // CHECK-LABEL: test_mm512_zextpbf16256_pbh512
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> {{.*}}, <32 x i32>
+ return _mm512_zextpbf16256_pbh512(__a);
+}
+
+__m512bh test_mm512_abs_pbh(__m512bh a) {
+ // CHECK-LABEL: @test_mm512_abs_pbh
+ // CHECK: and <16 x i32>
+ return _mm512_abs_pbh(a);
+}
+
+// VMOVSH
+
+__m512bh test_mm512_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm512_load_pbh
+ // CHECK: load <32 x bfloat>, ptr %{{.*}}, align 64
+ return _mm512_load_pbh(p);
+}
+
+__m512bh test_mm512_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm512_loadu_pbh
+ // CHECK: load <32 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm512_loadu_pbh(p);
+}
+
+void test_mm512_store_pbh(void *p, __m512bh a) {
+ // CHECK-LABEL: @test_mm512_store_pbh
+ // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 64
+ _mm512_store_pbh(p, a);
+}
+
+void test_mm512_storeu_pbh(void *p, __m512bh a) {
+ // CHECK-LABEL: @test_mm512_storeu_pbh
+ // CHECK: store <32 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm512_storeu_pbh(p, a);
+}
+
+__m512bh test_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
+ // CHECK-LABEL: @test_mm512_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: %{{.*}} = select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_blend_pbh(__U, __A, __W);
+}
+
+__m512bh test_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat>
+ return _mm512_permutex2var_pbh(__A, __I, __B);
+}
+
+__m512bh test_mm512_permutexvar_epi16(__m512i __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_permutexvar_epi16
+ // CHECK: %{{.*}} = bitcast <32 x bfloat> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // CHECK: %{{.*}} = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x bfloat>
+ return _mm512_permutexvar_pbh(__A, __B);
+}
+
+__m512bh test_mm512_addne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_addne_pbh
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_addne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fadd <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_subne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_subne_pbh
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_subne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fsub <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_mulne_pbh
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mulne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fmul <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_divne_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_divne_pbh
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_divne_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: %{{.*}} = fdiv <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_max_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512(
+ return _mm512_max_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_max_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_min_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16512(
+ return _mm512_min_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16512
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_min_pbh(__U, __A, __B);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: @test_mm512_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_lt_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_le_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_unord_q(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nlt_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nle_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ord_q(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nge_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ngt_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_false_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ge_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_gt_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_true_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_lt_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_le_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_unord_s(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nlt_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nle_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ord_s(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_eq_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_nge_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ngt_uq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_false_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_neq_os(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_ge_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_gt_oq(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask32 test_mm512_cmp_pbh_mask_true_us(__m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: @test_mm512_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_lt_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_le_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_unord_q(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nle_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ord_q(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nge_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_false_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ge_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_gt_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_true_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_lt_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_le_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_unord_s(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nle_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ord_s(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_eq_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_nge_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_uq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_false_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_neq_os(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_ge_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_gt_oq(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask32 test_mm512_mask_cmp_pbh_mask_true_us(__mmask32 m, __m512bh a, __m512bh b) {
+ // CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask32 test_mm512_mask_fpclass_pbh_mask(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512
+ return _mm512_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask32 test_mm512_fpclass_pbh_mask(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.512
+ return _mm512_fpclass_pbh_mask(__A, 4);
+}
+
+__m512bh test_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_scalef_pbh(__A, __B);
+}
+
+__m512bh test_mm512_mask_scalef_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m512bh test_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
+ // CHECK-LABEL: @test_mm512_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.512
+ return _mm512_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m512bh test_mm512_rcp_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return _mm512_rcp_pbh(__A);
+}
+
+__m512bh test_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return (__m512bh)_mm512_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.512
+ return _mm512_maskz_rcp_pbh(__U, __A);
+}
+
+__m512bh test_mm512_getexp_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_getexp_pbh(__A);
+}
+
+__m512bh test_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.512
+ return _mm512_maskz_getexp_pbh(__U, __A);
+}
+
+__m512bh test_mm512_rsqrt_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return _mm512_rsqrt_pbh(__A);
+}
+
+__m512bh test_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return (__m512bh)_mm512_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.512
+ return _mm512_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m512bh test_mm512_reducene_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_reducene_pbh(__A, 3);
+}
+
+__m512bh test_mm512_mask_reducene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m512bh test_mm512_maskz_reducene_pbh(__mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.512
+ return _mm512_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m512bh test_mm512_roundscalene_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_roundscalene_pbh(__A, 3);
+}
+
+__m512bh test_mm512_mask_roundscalene_pbh(__m512bh __W, __mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m512bh test_mm512_maskz_roundscalene_pbh(__mmask16 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.512
+ return _mm512_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m512bh test_mm512_getmant_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_mask_getmant_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_maskz_getmant_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.512
+ return _mm512_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m512bh test_mm512_sqrt_pbh(__m512bh __A) {
+ // CHECK-LABEL: @test_mm512_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ return _mm512_sqrt_pbh(__A);
+}
+
+__m512bh test_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_mask_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ // CHECK: bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return (__m512bh)_mm512_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m512bh test_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
+ // CHECK-LABEL: @test_mm512_maskz_sqrt_pbh
+ // CHECK: %{{.*}} = call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %{{.*}})
+ // CHECK: bitcast i32 %{{.*}} to <32 x i1>
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_sqrt_pbh(__U, __A);
+}
+
+__m512bh test_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fmaddne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fmaddne_pbh
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fmsubne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fnmaddne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fnmaddne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m512bh test_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ return _mm512_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m512bh test_mm512_mask_fnmsubne_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m512bh test_mm512_mask3_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
+ // CHECK-LABEL: @test_mm512_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m512bh test_mm512_maskz_fnmsubne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
+ // CHECK-LABEL: @test_mm512_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}})
+ // CHECK: select <32 x i1> %{{.*}}, <32 x bfloat> %{{.*}}, <32 x bfloat> %{{.*}}
+ return _mm512_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
new file mode 100644
index 00000000000000..84bac3e8dc63b8
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@@ -0,0 +1,2018 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - -Wno-invalid-feature-combination -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256bh test_mm256_setzero_pbh() {
+ // CHECK-LABEL: @test_mm256_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm256_setzero_pbh();
+}
+
+__m128bh test_mm_setzero_pbh() {
+ // CHECK-LABEL: @test_mm_setzero_pbh
+ // CHECK: zeroinitializer
+ return _mm_setzero_pbh();
+}
+
+__m256bh test_mm256_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm256_undefined_pbh
+ // CHECK: ret <16 x bfloat> zeroinitializer
+ return _mm256_undefined_pbh();
+}
+
+__m128bh test_mm_undefined_pbh(void) {
+ // CHECK-LABEL: @test_mm_undefined_pbh
+ // CHECK: ret <8 x bfloat> zeroinitializer
+ return _mm_undefined_pbh();
+}
+
+__bf16 test_mm_cvtsbh_bf16(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_cvtsbh_bf16
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ return _mm_cvtsbh_bf16(__A);
+}
+
+__bf16 test_mm256_cvtsbh_bf16(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_cvtsbh_bf16
+ // CHECK: extractelement <16 x bfloat> %{{.*}}, i32 0
+ return _mm256_cvtsbh_bf16(__A);
+}
+
+__m128bh test_mm_set_sbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm_set_sbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 7
+ return _mm_set_sbh(h);
+}
+
+__m128bh test_mm_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm_set1_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_set1_pbh(h);
+}
+
+__m256bh test_mm256_set1_pbh(__bf16 h) {
+ // CHECK-LABEL: @test_mm256_set1_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_set1_pbh(h);
+}
+
+__m128bh test_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ // CHECK-LABEL: @test_mm_set_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
+}
+
+__m256bh test_mm256_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ // CHECK-LABEL: @test_mm256_set_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_set_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
+}
+
+__m128bh test_mm_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8) {
+ // CHECK-LABEL: @test_mm_setr_pbh
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <8 x bfloat> {{.*}}, i32 7
+ return _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8);
+}
+
+__m256bh test_mm256_setr_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4,
+ __bf16 bf5, __bf16 bf6, __bf16 bf7, __bf16 bf8,
+ __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
+ __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) {
+ // CHECK-LABEL: @test_mm256_setr_pbh
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 0
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 1
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 2
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 3
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 4
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 5
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 6
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 7
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 8
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 9
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 10
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 11
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 12
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 13
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 14
+ // CHECK: insertelement <16 x bfloat> {{.*}}, i32 15
+ return _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8,
+ bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16);
+}
+
+__m128 test_mm_castpbf16_ps(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_ps
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <4 x float>
+ return _mm_castpbf16_ps(A);
+}
+
+__m256 test_mm256_castpbf16_ps(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_ps
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <8 x float>
+ return _mm256_castpbf16_ps(A);
+}
+
+__m128i test_mm_castpbf16_si128(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_si128
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x i64>
+ return _mm_castpbf16_si128(A);
+}
+
+__m256i test_mm256_castpbf16_si256(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_si256
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x i64>
+ return _mm256_castpbf16_si256(A);
+}
+
+__m128bh test_mm_castps_pbh(__m128 A) {
+ // CHECK-LABEL: test_mm_castps_pbh
+ // CHECK: bitcast <4 x float> %{{.*}} to <8 x bfloat>
+ return _mm_castps_pbh(A);
+}
+
+__m256bh test_mm256_castps_pbh(__m256 A) {
+ // CHECK-LABEL: test_mm256_castps_pbh
+ // CHECK: bitcast <8 x float> %{{.*}} to <16 x bfloat>
+ return _mm256_castps_pbh(A);
+}
+
+__m128bh test_mm_castpd_pbh(__m128d A) {
+ // CHECK-LABEL: test_mm_castpd_pbh
+ // CHECK: bitcast <2 x double> %{{.*}} to <8 x bfloat>
+ return _mm_castpd_pbh(A);
+}
+
+__m256bh test_mm256_castpd_pbh(__m256d A) {
+ // CHECK-LABEL: test_mm256_castpd_pbh
+ // CHECK: bitcast <4 x double> %{{.*}} to <16 x bfloat>
+ return _mm256_castpd_pbh(A);
+}
+
+__m128bh test_mm_castsi128_pbh(__m128i A) {
+ // CHECK-LABEL: test_mm_castsi128_pbh
+ // CHECK: bitcast <2 x i64> %{{.*}} to <8 x bfloat>
+ return _mm_castsi128_pbh(A);
+}
+
+__m256bh test_mm256_castsi256_pbh(__m256i A) {
+ // CHECK-LABEL: test_mm256_castsi256_pbh
+ // CHECK: bitcast <4 x i64> %{{.*}} to <16 x bfloat>
+ return _mm256_castsi256_pbh(A);
+}
+
+__m128d test_mm_castpbf16_pd(__m128bh A) {
+ // CHECK-LABEL: test_mm_castpbf16_pd
+ // CHECK: bitcast <8 x bfloat> %{{.*}} to <2 x double>
+ return _mm_castpbf16_pd(A);
+}
+
+__m128bh test_mm256_castpbf16256_pbh128(__m256bh __a) {
+ // CHECK-LABEL: test_mm256_castpbf16256_pbh128
+ // CHECK: shufflevector <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ return _mm256_castpbf16256_pbh128(__a);
+}
+
+__m256bh test_mm256_castpbf16128_pbh256(__m128bh __a) {
+ // CHECK-LABEL: test_mm256_castpbf16128_pbh256
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ return _mm256_castpbf16128_pbh256(__a);
+}
+
+__m256d test_mm256_castpbf16_pd(__m256bh A) {
+ // CHECK-LABEL: test_mm256_castpbf16_pd
+ // CHECK: bitcast <16 x bfloat> %{{.*}} to <4 x double>
+ return _mm256_castpbf16_pd(A);
+}
+
+__m256bh test_mm256_zextpbf16128_pbh256(__m128bh __a) {
+ // CHECK-LABEL: test_mm256_zextpbf16128_pbh256
+ // CHECK: shufflevector <8 x bfloat> %{{.*}}, <8 x bfloat> {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ return _mm256_zextpbf16128_pbh256(__a);
+}
+
+__m128bh test_mm_abs_pbh(__m128bh a) {
+ // CHECK-LABEL: @test_mm_abs_pbh
+ // CHECK: and <4 x i32>
+ return _mm_abs_pbh(a);
+}
+
+__m256bh test_mm256_abs_pbh(__m256bh a) {
+ // CHECK-LABEL: @test_mm256_abs_pbh
+ // CHECK: and <8 x i32>
+ return _mm256_abs_pbh(a);
+}
+
+__m256bh test_mm256_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm256_loadu_pbh
+ // CHECK: load <16 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm256_loadu_pbh(p);
+}
+
+__m128bh test_mm_load_sbh(void const *A) {
+ // CHECK-LABEL: test_mm_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x bfloat> %{{.*}})
+ return _mm_load_sbh(A);
+}
+
+__m256bh test_mm256_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm256_load_pbh
+ // CHECK: load <16 x bfloat>, ptr %{{.*}}, align 32
+ return _mm256_load_pbh(p);
+}
+
+__m128bh test_mm_load_pbh(void *p) {
+ // CHECK-LABEL: @test_mm_load_pbh
+ // CHECK: load <8 x bfloat>, ptr %{{.*}}, align 16
+ return _mm_load_pbh(p);
+}
+
+__m128bh test_mm_loadu_pbh(void *p) {
+ // CHECK-LABEL: @test_mm_loadu_pbh
+ // CHECK: load <8 x bfloat>, ptr {{.*}}, align 1{{$}}
+ return _mm_loadu_pbh(p);
+}
+
+void test_mm_store_sbh(void *A, __m128bh B) {
+ // CHECK-LABEL: test_mm_store_sbh
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: store bfloat %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ _mm_store_sbh(A, B);
+}
+
+void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_store_sbh
+ // CHECK: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
+ _mm_mask_store_sbh(__P, __U, __A);
+}
+
+void test_mm256_store_pbh(void *p, __m256bh a) {
+ // CHECK-LABEL: @test_mm256_store_pbh
+ // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 32
+ _mm256_store_pbh(p, a);
+}
+
+void test_mm_store_pbh(void *p, __m128bh a) {
+ // CHECK-LABEL: @test_mm_store_pbh
+ // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 16
+ _mm_store_pbh(p, a);
+}
+
+__m128bh test_mm_mask_load_sbh(__m128bh __A, __mmask8 __U, const void *__W) {
+ // CHECK-LABEL: @test_mm_mask_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_mask_load_sbh(__A, __U, __W);
+}
+
+__m128bh test_mm_maskz_load_sbh(__mmask8 __U, const void *__W) {
+ // CHECK-LABEL: @test_mm_maskz_load_sbh
+ // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_maskz_load_sbh(__U, __W);
+}
+
+void test_mm256_storeu_pbh(void *p, __m256bh a) {
+ // CHECK-LABEL: @test_mm256_storeu_pbh
+ // CHECK: store <16 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm256_storeu_pbh(p, a);
+}
+
+void test_mm_storeu_pbh(void *p, __m128bh a) {
+ // CHECK-LABEL: @test_mm_storeu_pbh
+ // CHECK: store <8 x bfloat> %{{.*}}, ptr %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_pbh(p, a);
+}
+
+__m128bh test_mm_move_sbh(__m128bh A, __m128bh B) {
+ // CHECK-LABEL: test_mm_move_sbh
+ // CHECK: extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i32 0
+ return _mm_move_sbh(A, B);
+}
+
+__m128bh test_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mask_move_sbh
+ // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
+ // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
+ // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
+ // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
+ return _mm_mask_move_sbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_maskz_move_sbh
+ // CHECK: [[EXT:%.*]] = extractelement <8 x bfloat> %{{.*}}, i32 0
+ // CHECK: insertelement <8 x bfloat> %{{.*}}, bfloat [[EXT]], i32 0
+ // CHECK: [[A:%.*]] = extractelement <8 x bfloat> [[VEC:%.*]], i64 0
+ // CHECK-NEXT: [[B:%.*]] = extractelement <8 x bfloat> %{{.*}}, i64 0
+ // CHECK-NEXT: bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK-NEXT: extractelement <8 x i1> %{{.*}}, i64 0
+ // CHECK-NEXT: [[SEL:%.*]] = select i1 %{{.*}}, bfloat [[A]], bfloat [[B]]
+ // CHECK-NEXT: insertelement <8 x bfloat> [[VEC]], bfloat [[SEL]], i64 0
+ return _mm_maskz_move_sbh(__U, __A, __B);
+}
+
+__m128bh test_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) {
+ // CHECK-LABEL: @test_mm_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_blend_pbh(__U, __A, __W);
+}
+
+__m256bh test_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
+ // CHECK-LABEL: @test_mm256_mask_blend_pbh
+ // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_blend_pbh(__U, __A, __W);
+}
+
+__m128bh test_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
+ return _mm_permutex2var_pbh(__A, __I, __B);
+}
+
+__m256bh test_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_permutex2var_pbh
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
+ return _mm256_permutex2var_pbh(__A, __I, __B);
+}
+
+__m128bh test_mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_permutexvar_pbh
+ // CHECK: %{{.*}} = bitcast <8 x bfloat> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // CHECK: %{{.*}} = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x bfloat>
+ return _mm_permutexvar_pbh(__A, __B);
+}
+
+__m256bh test_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_permutexvar_pbh
+ // CHECK: %{{.*}} = bitcast <16 x bfloat> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // CHECK: %{{.*}} = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+ // CHECK: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x bfloat>
+ return _mm256_permutexvar_pbh(__A, __B);
+}
+
+__m256bh test_mm256_addne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_addne_pbh
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_addne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fadd <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_addne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_addne_pbh
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_addne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_addne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_addne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_addne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fadd <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_addne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_subne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_subne_pbh
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_subne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fsub <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_subne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_subne_pbh
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_subne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_subne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_subne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_subne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fsub <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_subne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_mulne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_mulne_pbh
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mulne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fmul <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_mulne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mulne_pbh
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mulne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_mulne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_mulne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_mulne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fmul <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_mulne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_divne_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_divne_pbh
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_divne_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: %{{.*}} = fdiv <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_divne_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_divne_pbh
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_divne_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_divne_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_divne_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_divne_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: %{{.*}} = fdiv <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_divne_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_max_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256(
+ return _mm256_max_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_max_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_max_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_max_pbh
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128(
+ return _mm_max_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_max_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_max_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_max_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vmaxpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_max_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_min_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16256(
+ return _mm256_min_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16256
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_min_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_min_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_min_pbh
+ // CHECK: @llvm.x86.avx10.vminpbf16128(
+ return _mm_min_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_min_pbh(__m128bh __W, __mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_min_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_min_pbh(__mmask16 __U, __m128bh __A, __m128bh __B) {
+ // CHECK: @llvm.x86.avx10.vminpbf16128
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_min_pbh(__U, __A, __B);
+}
+
+int test_mm_comeqsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comeqsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comeqsbh(__A, __B);
+}
+
+int test_mm_comltsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comltsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comltsbh(__A, __B);
+}
+
+int test_mm_comlesbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comlesbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comlesbh(__A, __B);
+}
+
+int test_mm_comgtsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comgtsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comgtsbh(__A, __B);
+}
+
+int test_mm_comgesbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comgesbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comgesbh(__A, __B);
+}
+
+int test_mm_comneqsbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: test_mm_comneqsbh
+ // CHECK: %{{.}} = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %{{.}}, <8 x bfloat> %{{.}})
+ return _mm_comneqsbh(__A, __B);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: @test_mm256_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_lt_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_le_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_unord_q(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nlt_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nle_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ord_q(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nge_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ngt_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_false_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ge_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_gt_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_true_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_lt_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_le_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_unord_s(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nlt_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nle_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ord_s(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_eq_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_nge_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ngt_uq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_false_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_neq_os(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_ge_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_gt_oq(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm256_cmp_pbh_mask_true_us(__m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: @test_mm256_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_lt_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_le_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_unord_q(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nle_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ord_q(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nge_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_false_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ge_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_gt_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_true_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_lt_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_le_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_unord_s(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nle_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ord_s(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_eq_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_nge_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_uq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_false_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_neq_os(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_ge_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_gt_oq(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm256_mask_cmp_pbh_mask_true_us(__mmask16 m, __m256bh a, __m256bh b) {
+ // CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: @test_mm_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_lt_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_le_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_unord_q(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nlt_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nle_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ord_q(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nge_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ngt_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_false_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ge_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_gt_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_true_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_lt_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_le_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_unord_s(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nlt_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nle_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ord_s(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_eq_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_nge_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ngt_uq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_false_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_neq_os(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_ge_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_gt_oq(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_cmp_pbh_mask_true_us(__m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_cmp_pbh_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: @test_mm_mask_cmp_pbh_mask_eq_oq
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_lt_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_os
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_le_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_os
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_unord_q(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_q
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_uq
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nlt_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_us
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nle_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_us
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ord_q(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_q
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_uq
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nge_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_us
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ngt_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_us
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_false_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_oq
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_oq
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ge_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_os
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_gt_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_os
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_true_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_uq
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_os
+ // CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_lt_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_oq
+ // CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_le_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_oq
+ // CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_unord_s(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_s
+ // CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_us
+ // CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nlt_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_uq
+ // CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nle_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_uq
+ // CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ord_s(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_s
+ // CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_eq_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_us
+ // CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_nge_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_uq
+ // CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ngt_uq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_uq
+ // CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_false_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_os
+ // CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_neq_os(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_os
+ // CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_ge_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_oq
+ // CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_gt_oq(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_oq
+ // CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pbh_mask_true_us(__mmask8 m, __m128bh a, __m128bh b) {
+ // CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_us
+ // CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
+}
+
+
+__mmask16 test_mm256_mask_fpclass_pbh_mask(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
+ return _mm256_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask16 test_mm256_fpclass_pbh_mask(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.256
+ return _mm256_fpclass_pbh_mask(__A, 4);
+}
+
+__mmask8 test_mm_mask_fpclass_pbh_mask(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
+ return _mm_mask_fpclass_pbh_mask(__U, __A, 4);
+}
+
+__mmask8 test_mm_fpclass_pbh_mask(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_fpclass_pbh_mask
+ // CHECK: @llvm.x86.avx10.fpclass.nepbf16.128
+ return _mm_fpclass_pbh_mask(__A, 4);
+}
+
+__m256bh test_mm256_scalef_pbh(__m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_scalef_pbh(__A, __B);
+}
+
+__m256bh test_mm256_mask_scalef_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m256bh test_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) {
+ // CHECK-LABEL: @test_mm256_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.256
+ return _mm256_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m256bh test_mm256_rcp_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return _mm256_rcp_pbh(__A);
+}
+
+__m256bh test_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return (__m256bh)_mm256_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.256
+ return _mm256_maskz_rcp_pbh(__U, __A);
+}
+
+__m256bh test_mm256_getexp_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_getexp_pbh(__A);
+}
+
+__m256bh test_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.256
+ return _mm256_maskz_getexp_pbh(__U, __A);
+}
+
+__m256bh test_mm256_rsqrt_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return _mm256_rsqrt_pbh(__A);
+}
+
+__m256bh test_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return (__m256bh)_mm256_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.256
+ return _mm256_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m256bh test_mm256_reducene_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_reducene_pbh(__A, 3);
+}
+
+__m256bh test_mm256_mask_reducene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m256bh test_mm256_maskz_reducene_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.256
+ return _mm256_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m256bh test_mm256_roundscalene_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_roundscalene_pbh(__A, 3);
+}
+
+__m256bh test_mm256_mask_roundscalene_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m256bh test_mm256_maskz_roundscalene_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.256
+ return _mm256_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m256bh test_mm256_getmant_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_mask_getmant_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_maskz_getmant_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.256
+ return _mm256_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m256bh test_mm256_sqrt_pbh(__m256bh __A) {
+ // CHECK-LABEL: @test_mm256_sqrt_pbh
+ // CHECK: call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %{{.*}})
+ return _mm256_sqrt_pbh(__A);
+}
+
+__m256bh test_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_mask_sqrt_pbh
+ // CHECK: @llvm.sqrt.v16bf16
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return (__m256bh)_mm256_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m256bh test_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
+ // CHECK-LABEL: @test_mm256_maskz_sqrt_pbh
+ // CHECK: @llvm.sqrt.v16bf16
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_sqrt_pbh(__U, __A);
+}
+
+__m128bh test_mm_scalef_pbh(__m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_scalef_pbh(__A, __B);
+}
+
+__m128bh test_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_mask_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_mask_scalef_pbh(__W, __U, __A, __B);
+}
+
+__m128bh test_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
+ // CHECK-LABEL: @test_mm_maskz_scalef_pbh
+ // CHECK: @llvm.x86.avx10.mask.scalef.nepbf16.128
+ return _mm_maskz_scalef_pbh(__U, __A, __B);
+}
+
+__m128bh test_mm_rcp_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return _mm_rcp_pbh(__A);
+}
+
+__m128bh test_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return (__m128bh)_mm_mask_rcp_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_rcp_pbh
+ // CHECK: @llvm.x86.avx10.mask.rcp.nepbf16.128
+ return _mm_maskz_rcp_pbh(__U, __A);
+}
+
+__m128bh test_mm_getexp_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_getexp_pbh(__A);
+}
+
+__m128bh test_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_mask_getexp_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_getexp_pbh
+ // CHECK: @llvm.x86.avx10.mask.getexp.nepbf16.128
+ return _mm_maskz_getexp_pbh(__U, __A);
+}
+
+__m128bh test_mm_rsqrt_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return _mm_rsqrt_pbh(__A);
+}
+
+__m128bh test_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return (__m128bh)_mm_mask_rsqrt_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_rsqrt_pbh
+ // CHECK: @llvm.x86.avx10.mask.rsqrt.nepbf16.128
+ return _mm_maskz_rsqrt_pbh(__U, __A);
+}
+
+__m128bh test_mm_reducene_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_reducene_pbh(__A, 3);
+}
+
+__m128bh test_mm_mask_reducene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_mask_reducene_pbh(__W, __U, __A, 1);
+}
+
+__m128bh test_mm_maskz_reducene_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_reducene_pbh
+ // CHECK: @llvm.x86.avx10.mask.reduce.nepbf16.128
+ return _mm_maskz_reducene_pbh(__U, __A, 1);
+}
+
+__m128bh test_mm_roundscalene_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_roundscalene_pbh(__A, 3);
+}
+
+__m128bh test_mm_mask_roundscalene_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_mask_roundscalene_pbh(__W, __U, __A, 1);
+}
+
+__m128bh test_mm_maskz_roundscalene_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_roundscalene_pbh
+ // CHECK: @llvm.x86.avx10.mask.rndscale.nepbf16.128
+ return _mm_maskz_roundscalene_pbh(__U, __A, 1 );
+}
+
+__m128bh test_mm_getmant_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_getmant_pbh(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_mask_getmant_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_mask_getmant_pbh(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_maskz_getmant_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_getmant_pbh
+ // CHECK: @llvm.x86.avx10.mask.getmant.nepbf16.128
+ return _mm_maskz_getmant_pbh(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan);
+}
+
+__m128bh test_mm_sqrt_pbh(__m128bh __A) {
+ // CHECK-LABEL: @test_mm_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ return _mm_sqrt_pbh(__A);
+}
+
+__m128bh test_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_mask_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return (__m128bh)_mm_mask_sqrt_pbh(__W, __U, __A);
+}
+
+__m128bh test_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
+ // CHECK-LABEL: @test_mm_maskz_sqrt_pbh
+ // CHECK: call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> {{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_sqrt_pbh(__U, __A);
+}
+
+__m256bh test_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fmaddne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fmaddne_pbh
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fmsubne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fnmaddne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fnmaddne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m256bh test_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ return _mm256_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m256bh test_mm256_mask_fnmsubne_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m256bh test_mm256_mask3_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) {
+ // CHECK-LABEL: @test_mm256_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m256bh test_mm256_maskz_fnmsubne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) {
+ // CHECK-LABEL: @test_mm256_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}})
+ // CHECK: select <16 x i1> %{{.*}}, <16 x bfloat> %{{.*}}, <16 x bfloat> %{{.*}}
+ return _mm256_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fmaddne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fmaddne_pbh
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fmsubne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fmsubne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fmsubne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fnmaddne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fnmaddne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fnmaddne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fnmaddne_pbh
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fnmaddne_pbh(__U, __A, __B, __C);
+}
+
+__m128bh test_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ return _mm_fnmsubne_pbh(__A, __B, __C);
+}
+
+__m128bh test_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_mask_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask_fnmsubne_pbh(__A, __U, __B, __C);
+}
+
+__m128bh test_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_mask3_fnmsubne_pbh(__A, __B, __C, __U);
+}
+
+__m128bh test_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
+ // CHECK-LABEL: @test_mm_maskz_fnmsubne_pbh
+ // CHECK: fneg
+ // CHECK: fneg
+ // CHECK: call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}})
+ // CHECK: select <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}}, <8 x bfloat> %{{.*}}
+ return _mm_maskz_fnmsubne_pbh(__U, __A, __B, __C);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 8d000ed1e4f859..67114399d17f86 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7219,3 +7219,413 @@ def int_x86_avx10_mask_vcvtneph2hf8s512 : ClangBuiltin<"__builtin_ia32_vcvtneph2
DefaultAttrsIntrinsic<[llvm_v32i8_ty], [llvm_v32f16_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
}
+
+//===----------------------------------------------------------------------===//
+let TargetPrefix = "x86" in {
+ def int_x86_avx10_vaddnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vaddnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vaddnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vaddnepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vsubnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vsubnepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16256">,
+ Intrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmulnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vmulnepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16512
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16512">,
+ Intrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16256
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vdivnepbf16128
+ : ClangBuiltin<"__builtin_ia32_vdivnepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16512
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16256
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vmaxpbf16128
+ : ClangBuiltin<"__builtin_ia32_vmaxpbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16512
+ : ClangBuiltin<"__builtin_ia32_vminpbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16256
+ : ClangBuiltin<"__builtin_ia32_vminpbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vminpbf16128
+ : ClangBuiltin<"__builtin_ia32_vminpbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vcomsbf16eq
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16lt
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty,llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16le
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16le">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16gt
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16ge
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_vcomsbf16neq
+ : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rsqrt_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_rcp_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrcppbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_reduce_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vreducenepbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_128
+ : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], [ llvm_v8bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_256
+ : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], [ llvm_v16bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_fpclass_nepbf16_512
+ : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], [ llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getexp_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vgetexppbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_getmant_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vgetmantpbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_rndscale_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_128
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16128_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_256
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16256_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_mask_scalef_nepbf16_512
+ : ClangBuiltin<"__builtin_ia32_vscalefpbf16512_mask">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmadd231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfmsub231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmadd231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub213nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub132nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16512
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16512">,
+ DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
+ [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16256
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16256">,
+ DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
+ [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx10_vfnmsub231nepbf16128
+ : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16128">,
+ DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
+ [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [ IntrNoMem ]>;
+}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index eda3c9fd50bf56..c242b406f21349 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3305,11 +3305,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
(PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
- PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) {
+ PatchedName.ends_with("pbf16") || PatchedName.ends_with("ps") ||
+ PatchedName.ends_with("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
+ unsigned suffixLength = PatchedName.ends_with("pbf16") ? 5 : 2;
unsigned CC = StringSwitch<unsigned>(
- PatchedName.slice(CCIdx, PatchedName.size() - 2))
+ PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
.Case("eq", 0x00)
.Case("eq_oq", 0x00)
.Case("lt", 0x01)
@@ -3372,6 +3374,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName = "vcmpsh";
else if (PatchedName.ends_with("ph"))
PatchedName = "vcmpph";
+ else if (PatchedName.ends_with("pbf16"))
+ PatchedName = "vcmppbf16";
else
llvm_unreachable("Unexpected suffix!");
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 33104524c5a890..8fcc1c10d93a04 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -167,6 +167,15 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -205,7 +214,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
printwordmem(MI, CurOp--, OS);
else
printdwordmem(MI, CurOp--, OS);
- } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD &&
+ (Desc.TSFlags & X86II::OpMapMask) != X86II::TA) {
assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
"Unexpected op map!");
printqwordmem(MI, CurOp--, OS);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index ad1f2dc532d1c2..e7ba13215feb59 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -309,6 +309,17 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk:
OS << "sh\t";
break;
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
+ OS << "pbf16\t";
+ break;
}
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index 7c8459a546516e..39600ffcadd8ee 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -146,6 +146,15 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk:
+ case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri:
+ case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri:
+ case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri:
+ case X86::VCMPPBF16Z128rmik: case X86::VCMPPBF16Z128rrik:
+ case X86::VCMPPBF16Z256rmik: case X86::VCMPPBF16Z256rrik:
+ case X86::VCMPPBF16Zrmik: case X86::VCMPPBF16Zrrik:
+ case X86::VCMPPBF16Z128rmbi: case X86::VCMPPBF16Z128rmbik:
+ case X86::VCMPPBF16Z256rmbi: case X86::VCMPPBF16Z256rmbik:
+ case X86::VCMPPBF16Zrmbi: case X86::VCMPPBF16Zrmbik:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1a6be4eb5af1ef..5fb932e7009558 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2359,6 +2359,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom);
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
+ addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
+ addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
+
+ setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
+ setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ if (Subtarget.hasVLX()) {
+ for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
+ }
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
@@ -12211,7 +12236,8 @@ static bool isShuffleFoldableLoad(SDValue V) {
template<typename T>
static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {
T EltVT = VT.getScalarType();
- return EltVT == MVT::bf16 || (EltVT == MVT::f16 && !Subtarget.hasFP16());
+ return (EltVT == MVT::bf16 && !Subtarget.hasAVX10_2()) ||
+ (EltVT == MVT::f16 && !Subtarget.hasFP16());
}
/// Try to lower insertion of a single element into a zero vector.
@@ -23264,7 +23290,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (isFP) {
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
- assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::bf16 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
+ EltVT == MVT::f64);
if (isSoftF16(EltVT, Subtarget))
return SDValue();
@@ -23281,7 +23308,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Op0.getSimpleValueType().is512BitVector())) {
#ifndef NDEBUG
unsigned Num = VT.getVectorNumElements();
- assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16));
+ assert(Num <= 16 ||
+ (Num == 32 && (EltVT == MVT::f16 || EltVT == MVT::bf16)));
#endif
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
@@ -54154,7 +54182,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
EVT ScalarVT = VT.getScalarType();
if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
!Subtarget.hasAnyFMA()) &&
- !(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
+ !(ScalarVT == MVT::f16 && Subtarget.hasFP16()) &&
+ !(ScalarVT == MVT::bf16 && Subtarget.hasAVX10_2()))
return SDValue();
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index a518347cfcd82e..8fc66d24b1658c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -910,3 +910,313 @@ multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest,
defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info,
avx512vl_i8_info, 0x1e, X86vcvthf82ph>,
AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>;
+
+//-------------------------------------------------
+// AVX10 BF16 instructions
+//-------------------------------------------------
+
+// VADDNEPBF16, VSUBNEPBF16, VMULNEPBF16, VDIVNEPBF16, VMAXPBF16, VMINPBF16
+multiclass avx10_fp_binopne_int_pbf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+multiclass avx10_fp_binop_pbf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0,
+ SDPatternOperator MaskOpNode = OpNode> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm NEPBF16Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm NEPBF16Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VADD : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>;
+defm VSUB : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>;
+defm VMUL : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 0>;
+defm VDIV : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>;
+defm VMIN : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
+defm VMAX : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
+}
+
+// VCOMSBF16
+let Uses = []<Register>, mayRaiseFPException = 0,
+ Defs = [EFLAGS], Predicates = [HasAVX10_2_512] in {
+ defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
+ "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+
+ let isCodeGenOnly = 1 in {
+ defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
+ sse_load_bf16, "comsbf16", SSEPackedSingle>,
+ T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ }
+}
+
+// VCMPPBF16
+multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ let mayRaiseFPException = 0 in {
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
+ 1>, Sched<[sched]>;
+
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ timm:$cc)>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $cc",
+ (X86cmpm (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1),
+ (_.VT (_.BroadcastLdFrag addr:$src2)),
+ timm:$cc)>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ }
+}
+
+multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX10_2_512] in
+ defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
+ }
+}
+
+defm VCMPPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
+ AVX512XDIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
+
+
+// VSQRTNEPBF16
+multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteSizes sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm NEPBF16Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.ZMM, v32bf16_info>,
+ EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm NEPBF16Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.XMM, v8bf16x_info>,
+ EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ defm NEPBF16Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.YMM, v16bf16x_info>,
+ EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in
+defm VSQRT : avx10_sqrt_packed_bf16<0x51, "vsqrtne", SchedWriteFSqrtSizes>;
+
+// VRSQRTPBF16, VRCPPBF16, VSRQTPBF16, VGETEXPPBF16
+multiclass avx10_fp14_pbf16<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.ZMM, v32bf16_info>,
+ EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.XMM, v8bf16x_info>,
+ EVEX_V128;
+ defm PBF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
+ OpNode, sched.YMM, v16bf16x_info>,
+ EVEX_V256;
+ }
+}
+
+defm VRSQRT : avx10_fp14_pbf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>,
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+defm VRCP : avx10_fp14_pbf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>,
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+defm VGETEXP : avx10_fp14_pbf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>,
+ T_MAP5, EVEX_CD8<16, CD8VF>;
+
+// VSCALEFPBF16
+multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
+ EVEX_V512, T_MAP6,PS, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
+ defm PBF16Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in
+defm VSCALEF : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
+
+// VREDUCENEPBF16, VRNDSCALENEPBF16, VGETMANTPBF16
+multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128>, EVEX_V128;
+ defm PBF16Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256>, EVEX_V256;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VREDUCENE : avx10_common_unary_fp_packed_imm_bf16<"vreducene", avx512vl_bf16_info, 0x56,
+ X86VReduce, X86VReduce, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+defm VRNDSCALENE : avx10_common_unary_fp_packed_imm_bf16<"vrndscalene", avx512vl_bf16_info, 0x08,
+ X86any_VRndScale, X86VRndScale, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+defm VGETMANT : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
+ X86VGetMant, X86VGetMant, SchedWriteFRnd>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+}
+
+// VFPCLASSPBF16
+multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
+ avx512vl_bf16_info.info512, "z">, EVEX_V512;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
+ avx512vl_bf16_info.info128, "x">, EVEX_V128;
+ defm PBF16Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
+ avx512vl_bf16_info.info256, "y">, EVEX_V256;
+ }
+}
+
+// FIXME: need to set Uses = []<Register> but avx512_vector_fpclass has InstAlias.
+defm VFPCLASS : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+
+// VF[,N]M[ADD,SUB][132,213,231]NEPBF16
+multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD213NE : avx10_fma3p_213_bf16<0xA8, "vfmadd213nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB213NE : avx10_fma3p_213_bf16<0xAA, "vfmsub213nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD213NE : avx10_fma3p_213_bf16<0xAC, "vfnmadd213nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB213NE : avx10_fma3p_213_bf16<0xAE, "vfnmsub213nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
+
+multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD231NE : avx10_fma3p_231_bf16<0xB8, "vfmadd231nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB231NE : avx10_fma3p_231_bf16<0xBA, "vfmsub231nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD231NE : avx10_fma3p_231_bf16<0xBC, "vfnmadd231nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB231NE : avx10_fma3p_231_bf16<0xBE, "vfnmsub231nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
+
+multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode, SDNode MaskOpNode,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in
+ defm PBF16Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasAVX10_2] in {
+ defm PBF16Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PBF16Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
+let Uses = []<Register>, mayRaiseFPException = 0 in {
+defm VFMADD132NE : avx10_fma3p_132_bf16<0x98, "vfmadd132nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB132NE : avx10_fma3p_132_bf16<0x9A, "vfmsub132nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD132NE : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB132NE : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index e81f2a2fbb9512..cafb7b45a8dff5 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -211,6 +211,12 @@ def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
+
+def X86CmpMaskCC_Int :
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisSameAs<2, 1>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i32>]>;
+
def X86MaskCmpMaskCC :
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
@@ -1139,6 +1145,10 @@ def X86SubVBroadcastld256 : PatFrag<(ops node:$src),
// only load a single element.
// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
// the simple_load case.
+def sse_load_bf16 : PatFrags<(ops node:$ptr),
+ [(v8bf16 (simple_load node:$ptr)),
+ (v8bf16 (X86vzload16 node:$ptr)),
+ (v8bf16 (scalar_to_vector (loadf16 node:$ptr)))]>;
def sse_load_f16 : PatFrags<(ops node:$ptr),
[(v8f16 (simple_load node:$ptr)),
(v8f16 (X86vzload16 node:$ptr)),
diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td
index 208af630a352d7..531268b41da968 100644
--- a/llvm/lib/Target/X86/X86InstrUtils.td
+++ b/llvm/lib/Target/X86/X86InstrUtils.td
@@ -313,7 +313,7 @@ def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
-def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbh">;
+def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf16">;
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
@@ -323,7 +323,7 @@ def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
-def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbh">;
+def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf16">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
@@ -332,7 +332,7 @@ def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
-def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbh">;
+def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf16">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 68c1ce072549b9..4f39e66e22c238 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -389,6 +389,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV,
0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_128, INTR_TYPE_2OP,
+ X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_256, INTR_TYPE_2OP,
+ X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_fpclass_nepbf16_512, INTR_TYPE_2OP,
+ X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getexp_nepbf16_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_getmant_nepbf16_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_128, INTR_TYPE_1OP_MASK,
+ X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_256, INTR_TYPE_1OP_MASK,
+ X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rcp_nepbf16_512, INTR_TYPE_1OP_MASK,
+ X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_reduce_nepbf16_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rndscale_nepbf16_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_128, INTR_TYPE_1OP_MASK,
+ X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_256, INTR_TYPE_1OP_MASK,
+ X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_rsqrt_nepbf16_512, INTR_TYPE_1OP_MASK,
+ X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_128, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_256, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx10_mask_scalef_nepbf16_512, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx10_mask_vcmppd256, CMP_MASK_CC, X86ISD::CMPMM,
X86ISD::CMPMM_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcmpph256, CMP_MASK_CC, X86ISD::CMPMM,
@@ -655,6 +703,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD,
X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16le, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16lt, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(avx10_vcomsbf16neq, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(avx10_vcvtne2ph2bf8128, INTR_TYPE_2OP,
X86ISD::VCVTNE2PH2BF8, 0),
X86_INTRINSIC_DATA(avx10_vcvtne2ph2bf8256, INTR_TYPE_2OP,
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
new file mode 100644
index 00000000000000..33c40ac6bb32c7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-arith.ll
@@ -0,0 +1,587 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+define <32 x bfloat> @test_int_x86_avx10_vaddnepbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vaddnepbf16512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fadd <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fadd <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fadd <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define <32 x bfloat> @test_int_x86_avx10_sub_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fsub <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %zmm1, %zmm1 # encoding: [0x62,0xf5,0x75,0x48,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %zmm1, %zmm0, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fsub <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fsub <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fsub <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.vmulnepbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_mul_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fmul <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fmul <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fmul <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fmul <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define <32 x bfloat> @test_int_x86_avx10_div_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <32 x bfloat> %x1, %x2
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x49,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = fdiv <32 x bfloat> %x1, %x2
+ %res = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> %src
+ ret <32 x bfloat> %res
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_512(<32 x bfloat> %src, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xc9,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %val = load <32 x bfloat>, ptr %ptr
+ %res0 = fdiv <32 x bfloat> %x1, %x2
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ %t2 = fdiv <32 x bfloat> %x1, %val
+ %res2 = select <32 x i1> %mask, <32 x bfloat> %t2, <32 x bfloat> zeroinitializer
+ %res3 = fdiv <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+define i32 @test_int_x86_avx10_vcmppbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <32 x bfloat> %x1, %x2
+ %res = bitcast <32 x i1> %1 to i32
+ ret i32 %res
+}
+
+; FIXME: _mm512_mask_cmp_p[s|h]_mask is not using {k2} but gcc does
+define i32 @test_int_x86_avx10_vcmppbf16512_mask2(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16512_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x48,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <32 x bfloat> %x1, %x2
+ %2 = and <32 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <32 x i1> %2 to i32
+ ret i32 %3
+}
+
+define <32 x bfloat> @test_sqrt_nepbf16_512(<32 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %a0)
+ ret <32 x bfloat> %1
+}
+
+define <32 x bfloat> @test_mm512_mask_sqrt_pbh(<32 x bfloat> %__W, i32 %__U, <32 x bfloat> %__A) {
+; X64-LABEL: test_mm512_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__W
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_sqrt_pbh(i32 %__U, <32 x bfloat>%__A) {
+; X64-LABEL: test_mm512_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.sqrt.v32bf16(<32 x bfloat> %__A)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xb8,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xba,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %fneg.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %__C)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fnmaddne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fnmaddne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbc,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fnmaddne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %__C)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; CHECK-LABEL: test_mm512_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf6,0x74,0x48,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <32 x bfloat> %__B
+ %fneg1.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i, <32 x bfloat> %fneg1.i)
+ ret <32 x bfloat> %0
+}
+
+define <32 x bfloat> @test_mm512_mask_fnmsubne_pbh(<32 x bfloat> %__A, i32 %__U, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x49,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__A
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_mask3_fnmsubne_pbh(<32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C, i32 %__U) {
+; X64-LABEL: test_mm512_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1]
+; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0xbe,0xd1]
+; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> %__C
+ ret <32 x bfloat> %2
+}
+
+define <32 x bfloat> @test_mm512_maskz_fnmsubne_pbh(i32 %__U, <32 x bfloat> %__A, <32 x bfloat> %__B, <32 x bfloat> %__C) {
+; X64-LABEL: test_mm512_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xc9,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <32 x bfloat> %__B
+ %fneg1.i.i = fneg <32 x bfloat> %__C
+ %0 = tail call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %__A, <32 x bfloat> %fneg.i.i, <32 x bfloat> %fneg1.i.i)
+ %1 = bitcast i32 %__U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x bfloat> %0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %2
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
new file mode 100644
index 00000000000000..d574d54b9ad792
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
@@ -0,0 +1,296 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+declare <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ ret <32 x bfloat> %res0
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vminpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %res1
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat>, <32 x bfloat>)
+
+define <32 x bfloat> @test_int_x86_avx10_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ ret <32 x bfloat> %res0
+}
+
+define <32 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_512(<32 x bfloat> %x1, <32 x bfloat> %x2, i32 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %msk to <32 x i1>
+ %res0 = call <32 x bfloat> @llvm.x86.avx10.vmaxpbf16512(<32 x bfloat> %x1, <32 x bfloat> %x2)
+ %res1 = select <32 x i1> %mask, <32 x bfloat> %res0, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %res1
+}
+
+declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
+
+define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
+; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
+; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
+; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
+; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat> @test_rsqrt_nepbf16_512(<32 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> zeroinitializer, i32 -1)
+ ret <32 x bfloat> %res
+}
+
+declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32)
+
+define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4)
+ %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2)
+ %1 = and <32 x i1> %res1, %res
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat> @test_rcp_nepbf16_512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask) {
+; X64-LABEL: test_rcp_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x49,0x4c,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.512(<32 x bfloat> %a0, <32 x bfloat> %a1, i32 %mask)
+ ret <32 x bfloat> %res
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
+; X64-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x42,0xc0]
+; X86-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc8]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, i32 %x2)
+ %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> zeroinitializer, i32 -1)
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat>, i32, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf5,0x75,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 8, <32 x bfloat> %x2, i32 %x3)
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.512(<32 x bfloat> %x0, i32 4, <32 x bfloat> %x2, i32 -1)
+ %res2 = fadd <32 x bfloat> %res, %res1
+ ret <32 x bfloat> %res2
+}
+
+declare <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32)
+
+define <32 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
+; X64-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
+; X64-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf6,0x7c,0x48,0x2c,0xc1]
+; X86-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xd0]
+; X86-NEXT: vaddnepbf16 %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf5,0x6d,0x48,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i32 %x3 to <32 x i1>
+ %res1 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> %x2, i32 %x3)
+ %res2 = call <32 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.512(<32 x bfloat> %x0, <32 x bfloat> %x1, <32 x bfloat> zeroinitializer, i32 -1)
+ %res3 = fadd <32 x bfloat> %res1, %res2
+ ret <32 x bfloat> %res3
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
new file mode 100644
index 00000000000000..e0f5679e8ac96d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -0,0 +1,1168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+define <16 x bfloat> @test_int_x86_avx10_add_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fadd <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+define <16 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fadd <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fadd <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fadd <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_add_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_add_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fadd <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_add_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_add_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x58,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fadd <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_add_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x0e]
+; X64-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_add_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vaddnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x58,0x08]
+; X86-NEXT: vaddnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x58,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fadd <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fadd <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fadd <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_sub_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fsub <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %ymm1, %ymm0, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fsub <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fsub <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fsub <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_sub_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_sub_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fsub <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_sub_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5c,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fsub <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_sub_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
+; X64-NEXT: vsubnepbf16 (%rsi), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x0e]
+; X64-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_sub_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsubnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5c,0xc2]
+; X86-NEXT: vsubnepbf16 (%eax), %xmm1, %xmm1 # encoding: [0x62,0xf5,0x75,0x08,0x5c,0x08]
+; X86-NEXT: vsubnepbf16 %xmm1, %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x5c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fsub <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fsub <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fsub <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mul_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fmul <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fmul <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fmul <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fmul <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mul_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_mul_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fmul <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_mul_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x59,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fmul <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_mul_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
+; X64-NEXT: vmulnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x0e]
+; X64-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_mul_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmulnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0xc2]
+; X86-NEXT: vmulnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x59,0x08]
+; X86-NEXT: vmulnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x59,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fmul <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fmul <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fmul <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define <16 x bfloat> @test_int_x86_avx10_div_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <16 x bfloat> %x1, %x2
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x75,0x29,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = fdiv <16 x bfloat> %x1, %x2
+ %res = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> %src
+ ret <16 x bfloat> %res
+}
+
+; FIXME: assembly order is different from fp16 ones
+define <16 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_256(<16 x bfloat> %src, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %ymm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0xa9,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %val = load <16 x bfloat>, ptr %ptr
+ %res0 = fdiv <16 x bfloat> %x1, %x2
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ %t2 = fdiv <16 x bfloat> %x1, %val
+ %res2 = select <16 x i1> %mask, <16 x bfloat> %t2, <16 x bfloat> zeroinitializer
+ %res3 = fdiv <16 x bfloat> %res1, %res2
+ ret <16 x bfloat> %res3
+}
+
+define <8 x bfloat> @test_int_x86_avx10_div_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_div_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = fdiv <8 x bfloat> %x1, %x2
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat> @test_int_x86_avx10_mask_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_mask_div_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_mask_div_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x75,0x09,0x5e,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = fdiv <8 x bfloat> %x1, %x2
+ %res = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> %src
+ ret <8 x bfloat> %res
+}
+
+; FIXME: assembly order is different from fp16 ones
+define <8 x bfloat> @test_int_x86_avx10_maskz_div_nepbf16_128(<8 x bfloat> %src, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk, ptr %ptr) {
+; X64-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
+; X64-NEXT: vdivnepbf16 (%rsi), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x0e]
+; X64-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_div_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdivnepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0xc2]
+; X86-NEXT: vdivnepbf16 (%eax), %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf5,0x75,0x89,0x5e,0x08]
+; X86-NEXT: vdivnepbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %val = load <8 x bfloat>, ptr %ptr
+ %res0 = fdiv <8 x bfloat> %x1, %x2
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ %t2 = fdiv <8 x bfloat> %x1, %val
+ %res2 = select <8 x i1> %mask, <8 x bfloat> %t2, <8 x bfloat> zeroinitializer
+ %res3 = fdiv <8 x bfloat> %res1, %res2
+ ret <8 x bfloat> %res3
+}
+
+define i16 @test_int_x86_avx10_vcmppbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <16 x bfloat> %x1, %x2
+ %res = bitcast <16 x i1> %1 to i16
+ ret i16 %res
+}
+
+define i16 @test_int_x86_avx10_vcmppbf16256_mask2(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16256_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7f,0x28,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl $3, %eax # encoding: [0x83,0xe0,0x03]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <16 x bfloat> %x1, %x2
+ %2 = and <16 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <16 x i1> %2 to i16
+ ret i16 %3
+}
+
+define i8 @test_int_x86_avx10_vcmppbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpunordpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x03]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp uno <8 x bfloat> %x1, %x2
+ %res = bitcast <8 x i1> %1 to i8
+ ret i8 %res
+}
+
+define i8 @test_int_x86_avx10_vcmppbf16128_mask2(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_vcmppbf16128_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpeqpbf16 %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0x7f,0x08,0xc2,0xc1,0x00]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andb $3, %al # encoding: [0x24,0x03]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = fcmp oeq <8 x bfloat> %x1, %x2
+ %2 = and <8 x i1> %1, <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
+
+define <16 x bfloat> @test_sqrt_nepbf16_256(<16 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %a0)
+ ret <16 x bfloat> %1
+}
+
+define <16 x bfloat> @test_mm256_mask_sqrt_pbh(<16 x bfloat> %__W, i16 %__U, <16 x bfloat> %__A) {
+; X64-LABEL: test_mm256_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__W
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_sqrt_pbh(i16 %__U, <16 x bfloat>%__A) {
+; X64-LABEL: test_mm256_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> %__A)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <8 x bfloat> @test_sqrt_nepbf16_128(<8 x bfloat> %a0) {
+; CHECK-LABEL: test_sqrt_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsqrtnepbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x51,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %1 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a0)
+ ret <8 x bfloat> %1
+}
+
+define <8 x bfloat> @test_mm_mask_sqrt_pbh(<8 x bfloat> %__W, i8 %__U, <8 x bfloat> %__A) {
+; X64-LABEL: test_mm_mask_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x51,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__W
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_sqrt_pbh(i8 %__U, <8 x bfloat>%__A) {
+; X64-LABEL: test_mm_maskz_sqrt_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_sqrt_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vsqrtnepbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x51,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %__A)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xb8,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xba,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %fneg.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %__C)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fnmaddne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fnmaddne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbc,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fnmaddne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %__C)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; CHECK-LABEL: test_mm256_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x74,0x28,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <16 x bfloat> %__B
+ %fneg1.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i, <16 x bfloat> %fneg1.i)
+ ret <16 x bfloat> %0
+}
+
+define <16 x bfloat> @test_mm256_mask_fnmsubne_pbh(<16 x bfloat> %__A, i16 zeroext %__U, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x29,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__A
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_mask3_fnmsubne_pbh(<16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C, i16 zeroext %__U) {
+; X64-LABEL: test_mm256_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0xbe,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> %__C
+ ret <16 x bfloat> %2
+}
+
+define <16 x bfloat> @test_mm256_maskz_fnmsubne_pbh(i16 zeroext %__U, <16 x bfloat> %__A, <16 x bfloat> %__B, <16 x bfloat> %__C) {
+; X64-LABEL: test_mm256_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0xa9,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <16 x bfloat> %__B
+ %fneg1.i.i = fneg <16 x bfloat> %__C
+ %0 = tail call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %__A, <16 x bfloat> %fneg.i.i, <16 x bfloat> %fneg1.i.i)
+ %1 = bitcast i16 %__U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x bfloat> %0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xa8,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x98,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xb8,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xa8,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xaa,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xba,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xaa,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %fneg.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fnmaddne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xac,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %__C)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fnmaddne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fnmaddne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbc,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fnmaddne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fnmaddne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fnmaddne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xac,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %__C)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; CHECK-LABEL: test_mm_fnmsubne_pbh:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf6,0x74,0x08,0xae,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %fneg.i = fneg <8 x bfloat> %__B
+ %fneg1.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i, <8 x bfloat> %fneg1.i)
+ ret <8 x bfloat> %0
+}
+
+define <8 x bfloat> @test_mm_mask_fnmsubne_pbh(<8 x bfloat> %__A, i8 zeroext %__U, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_mask_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub132nepbf16 %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf6,0x6c,0x09,0x9e,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__A
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_mask3_fnmsubne_pbh(<8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C, i8 zeroext %__U) {
+; X64-LABEL: test_mm_mask3_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask3_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub231nepbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0xbe,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> %__C
+ ret <8 x bfloat> %2
+}
+
+define <8 x bfloat> @test_mm_maskz_fnmsubne_pbh(i8 zeroext %__U, <8 x bfloat> %__A, <8 x bfloat> %__B, <8 x bfloat> %__C) {
+; X64-LABEL: test_mm_maskz_fnmsubne_pbh:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_fnmsubne_pbh:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vfnmsub213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x74,0x89,0xae,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %fneg.i.i = fneg <8 x bfloat> %__B
+ %fneg1.i.i = fneg <8 x bfloat> %__C
+ %0 = tail call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %__A, <8 x bfloat> %fneg.i.i, <8 x bfloat> %fneg1.i.i)
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %2
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
new file mode 100644
index 00000000000000..f0d3ed239662f7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
@@ -0,0 +1,536 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+declare <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat>, <16 x bfloat>)
+
+define <16 x bfloat> @test_int_x86_avx10_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ ret <16 x bfloat> %res0
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vminpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat>, <8 x bfloat>)
+
+define <8 x bfloat> @test_int_x86_avx10_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_min_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5d,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ ret <8 x bfloat> %res0
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_min_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_min_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vminpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5d,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vminpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %res1
+}
+
+declare <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat>, <16 x bfloat>)
+
+define <16 x bfloat> @test_int_x86_avx10_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ ret <16 x bfloat> %res0
+}
+
+define <16 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_256(<16 x bfloat> %x1, <16 x bfloat> %x2, i16 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %msk to <16 x i1>
+ %res0 = call <16 x bfloat> @llvm.x86.avx10.vmaxpbf16256(<16 x bfloat> %x1, <16 x bfloat> %x2)
+ %res1 = select <16 x i1> %mask, <16 x bfloat> %res0, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat>, <8 x bfloat>)
+
+define <8 x bfloat> @test_int_x86_avx10_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2) {
+; CHECK-LABEL: test_int_x86_avx10_max_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x5f,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ ret <8 x bfloat> %res0
+}
+
+define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1, <8 x bfloat> %x2, i8 %msk) {
+; X64-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx10_maskz_max_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vmaxpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x5f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %msk to <8 x i1>
+ %res0 = call <8 x bfloat> @llvm.x86.avx10.vmaxpbf16128(<8 x bfloat> %x1, <8 x bfloat> %x2)
+ %res1 = select <8 x i1> %mask, <8 x bfloat> %res0, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %res1
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat> @test_rsqrt_nepbf16_128(<8 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat> @test_rsqrt_nepbf16_256(<16 x bfloat> %a0) {
+; CHECK-LABEL: test_rsqrt_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vrsqrtpbf16 %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat> @test_rcp_nepbf16_128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask) {
+; X64-LABEL: test_rcp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x4c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.128(<8 x bfloat> %a0, <8 x bfloat> %a1, i8 %mask)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat> @test_rcp_nepbf16_256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask) {
+; X64-LABEL: test_rcp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_rcp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrcppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x4c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rcp.nepbf16.256(<16 x bfloat> %a0, <16 x bfloat> %a1, i16 %mask)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_reduce_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
+; X64-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_reduce_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vreducenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x56,0xc8,0x08]
+; X86-NEXT: vreducenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x56,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.reduce.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat>, i32)
+declare <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat>, i32)
+
+define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 4)
+ %res1 = call <8 x i1> @llvm.x86.avx10.fpclass.nepbf16.128(<8 x bfloat> %x0, i32 2)
+ %1 = and <8 x i1> %res1, %res
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02]
+; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04]
+; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 4)
+ %res1 = call <16 x i1> @llvm.x86.avx10.fpclass.nepbf16.256(<16 x bfloat> %x0, i32 2)
+ %1 = and <16 x i1> %res1, %res
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_getexp_nepbf16_128(<8 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgetexppbf16 %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x42,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x42,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x2)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_128(<8 x bfloat> %x0, i8 %x2) {
+; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x42,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> zeroinitializer, i8 %x2)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_getexp_nepbf16_256(<16 x bfloat> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_getexp_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgetexppbf16 %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x42,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_getexp_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2) {
+; X64-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getexp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x42,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x2)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_maskz_getexp_nepbf16_256(<16 x bfloat> %x0, i16 %x2) {
+; X64-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_getexp_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetexppbf16 %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x42,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getexp.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> zeroinitializer, i16 %x2)
+ ret <16 x bfloat> %res
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_getmant_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
+; X64-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_getmant_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vgetmantpbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x26,0xc8,0x08]
+; X86-NEXT: vgetmantpbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x26,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.getmant.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat>, i32, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat>, i32, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7f,0x08,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf5,0x75,0x08,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 8, <8 x bfloat> %x2, i8 %x3)
+ %res1 = call <8 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.128(<8 x bfloat> %x0, i32 4, <8 x bfloat> %x2, i8 -1)
+ %res2 = fadd <8 x bfloat> %res, %res1
+ ret <8 x bfloat> %res2
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_rndscale_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
+; X64-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
+; X64-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_rndscale_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vrndscalenepbf16 $8, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x08,0xc8,0x08]
+; X86-NEXT: vrndscalenepbf16 $4, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7f,0x28,0x08,0xc0,0x04]
+; X86-NEXT: vaddnepbf16 %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 8, <16 x bfloat> %x2, i16 %x3)
+ %res1 = call <16 x bfloat> @llvm.x86.avx10.mask.rndscale.nepbf16.256(<16 x bfloat> %x0, i32 4, <16 x bfloat> %x2, i16 -1)
+ %res2 = fadd <16 x bfloat> %res, %res1
+ ret <16 x bfloat> %res2
+}
+
+declare <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8)
+declare <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16)
+
+define <8 x bfloat>@test_int_x86_avx512_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf6,0x7c,0x08,0x2c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 -1)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
+; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x09,0x2c,0xd1]
+; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %x3 to <8 x i1>
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> %x2, i8 %x3)
+ ret <8 x bfloat> %res
+}
+
+define <8 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_128(<8 x bfloat> %x0, <8 x bfloat> %x1, i8 %x3) {
+; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0x89,0x2c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i8 %x3 to <8 x i1>
+ %res = call <8 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.128(<8 x bfloat> %x0, <8 x bfloat> %x1, <8 x bfloat> zeroinitializer, i8 %x3)
+ ret <8 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_scalef_nepbf16_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x7c,0x28,0x2c,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 -1)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_mask_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
+; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_mask_scalef_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7c,0x29,0x2c,0xd1]
+; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %x3 to <16 x i1>
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> %x2, i16 %x3)
+ ret <16 x bfloat> %res
+}
+
+define <16 x bfloat>@test_int_x86_avx512_maskz_scalef_nepbf16_256(<16 x bfloat> %x0, <16 x bfloat> %x1, i16 %x3) {
+; X64-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_avx512_maskz_scalef_nepbf16_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vscalefpbf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x7c,0xa9,0x2c,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %mask = bitcast i16 %x3 to <16 x i1>
+ %res = call <16 x bfloat> @llvm.x86.avx10.mask.scalef.nepbf16.256(<16 x bfloat> %x0, <16 x bfloat> %x1, <16 x bfloat> zeroinitializer, i16 %x3)
+ ret <16 x bfloat> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
new file mode 100644
index 00000000000000..8cc53db077e4f7
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-32.txt
@@ -0,0 +1,3015 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x58,0xd4
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x58,0xd4
+
+# ATT: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x58,0xd4
+
+# ATT: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x58,0xd4
+
+# ATT: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x58,0xd4
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x58,0x10
+
+# ATT: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x58,0x52,0x80
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x58,0x10
+
+# ATT: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x58,0x52,0x80
+
+# ATT: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x58,0x10
+
+# ATT: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f
+
+# ATT: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x58,0x52,0x80
+
+# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymm4, 123
+0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmm4, 123
+0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmm4, 123
+0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+# INTEL: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+# INTEL: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b
+
+# ATT: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+# INTEL: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcomsbf16 %xmm3, %xmm2
+# INTEL: vcomsbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x2f,0xd3
+
+# ATT: vcomsbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcomsbf16 291(%edi,%eax,4), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcomsbf16 (%eax), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [eax]
+0x62,0xf5,0x7d,0x08,0x2f,0x10
+
+# ATT: vcomsbf16 -64(,%ebp,2), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [2*ebp - 64]
+0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomsbf16 254(%ecx), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [ecx + 254]
+0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f
+
+# ATT: vcomsbf16 -256(%edx), %xmm2
+# INTEL: vcomsbf16 xmm2, word ptr [edx - 256]
+0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5e,0xd4
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5e,0xd4
+
+# ATT: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5e,0xd4
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5e,0x10
+
+# ATT: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5e,0x10
+
+# ATT: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80
+
+# ATT: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5e,0x10
+
+# ATT: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f
+
+# ATT: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x98,0xd4
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x98,0x52,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x98,0x52,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x98,0x10
+
+# ATT: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x98,0x52,0x80
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xa8,0xd4
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xa8,0x10
+
+# ATT: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xb8,0xd4
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xb8,0x10
+
+# ATT: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9a,0xd4
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9a,0x10
+
+# ATT: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xaa,0xd4
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xaa,0x10
+
+# ATT: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xba,0xd4
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xba,0x52,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xba,0x52,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xba,0x10
+
+# ATT: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xba,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9c,0xd4
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9c,0x10
+
+# ATT: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xac,0xd4
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xac,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xac,0x52,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xac,0x10
+
+# ATT: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xac,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xbc,0xd4
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xbc,0x10
+
+# ATT: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x9e,0xd4
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x9e,0x10
+
+# ATT: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xae,0xd4
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xae,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xae,0x52,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xae,0x10
+
+# ATT: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xae,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0xbe,0xd4
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0xbe,0x10
+
+# ATT: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80
+
+# ATT: vfpclasspbf16 $123, %zmm3, %k5
+# INTEL: vfpclasspbf16 k5, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm3, %k5
+# INTEL: vfpclasspbf16 k5, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm3, %k5
+# INTEL: vfpclasspbf16 k5, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b
+
+# ATT: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to8}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16x $123, -512(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to16}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%eax){1to32}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b
+
+# ATT: vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+# INTEL: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b
+
+# ATT: vgetexppbf16 %xmm3, %xmm2
+# INTEL: vgetexppbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x42,0xd3
+
+# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vgetexppbf16 xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x42,0xd3
+
+# ATT: vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2
+# INTEL: vgetexppbf16 zmm2, zmm3
+0x62,0xf5,0x7d,0x48,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vgetexppbf16 zmm2 {k7}, zmm3
+0x62,0xf5,0x7d,0x4f,0x42,0xd3
+
+# ATT: vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf5,0x7d,0xcf,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2
+# INTEL: vgetexppbf16 ymm2, ymm3
+0x62,0xf5,0x7d,0x28,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vgetexppbf16 ymm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x2f,0x42,0xd3
+
+# ATT: vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf5,0x7d,0xaf,0x42,0xd3
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to8}, %xmm2
+# INTEL: vgetexppbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x18,0x42,0x10
+
+# ATT: vgetexppbf16 -512(,%ebp,2), %xmm2
+# INTEL: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to16}, %ymm2
+# INTEL: vgetexppbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf5,0x7d,0x38,0x42,0x10
+
+# ATT: vgetexppbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80
+
+# ATT: vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%eax){1to32}, %zmm2
+# INTEL: vgetexppbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf5,0x7d,0x58,0x42,0x10
+
+# ATT: vgetexppbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f
+
+# ATT: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vgetmantpbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vgetmantpbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vgetmantpbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5f,0xd4
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5f,0xd4
+
+# ATT: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5f,0xd4
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5f,0x10
+
+# ATT: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5f,0x10
+
+# ATT: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80
+
+# ATT: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5f,0x10
+
+# ATT: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f
+
+# ATT: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5d,0xd4
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5d,0xd4
+
+# ATT: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5d,0xd4
+
+# ATT: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5d,0xd4
+
+# ATT: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5d,0xd4
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5d,0x10
+
+# ATT: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5d,0x10
+
+# ATT: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80
+
+# ATT: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5d,0x10
+
+# ATT: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f
+
+# ATT: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x59,0xd4
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x59,0xd4
+
+# ATT: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x59,0xd4
+
+# ATT: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x59,0xd4
+
+# ATT: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x59,0xd4
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x59,0x10
+
+# ATT: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x59,0x52,0x80
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x59,0x10
+
+# ATT: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x59,0x52,0x80
+
+# ATT: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x59,0x10
+
+# ATT: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f
+
+# ATT: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x59,0x52,0x80
+
+# ATT: vrcppbf16 %xmm3, %xmm2
+# INTEL: vrcppbf16 xmm2, xmm3
+0x62,0xf6,0x7c,0x08,0x4c,0xd3
+
+# ATT: vrcppbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vrcppbf16 xmm2 {k7}, xmm3
+0x62,0xf6,0x7c,0x0f,0x4c,0xd3
+
+# ATT: vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf6,0x7c,0x8f,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2
+# INTEL: vrcppbf16 zmm2, zmm3
+0x62,0xf6,0x7c,0x48,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vrcppbf16 zmm2 {k7}, zmm3
+0x62,0xf6,0x7c,0x4f,0x4c,0xd3
+
+# ATT: vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf6,0x7c,0xcf,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2
+# INTEL: vrcppbf16 ymm2, ymm3
+0x62,0xf6,0x7c,0x28,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vrcppbf16 ymm2 {k7}, ymm3
+0x62,0xf6,0x7c,0x2f,0x4c,0xd3
+
+# ATT: vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf6,0x7c,0xaf,0x4c,0xd3
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to8}, %xmm2
+# INTEL: vrcppbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf6,0x7c,0x18,0x4c,0x10
+
+# ATT: vrcppbf16 -512(,%ebp,2), %xmm2
+# INTEL: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to16}, %ymm2
+# INTEL: vrcppbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf6,0x7c,0x38,0x4c,0x10
+
+# ATT: vrcppbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80
+
+# ATT: vrcppbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%eax){1to32}, %zmm2
+# INTEL: vrcppbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf6,0x7c,0x58,0x4c,0x10
+
+# ATT: vrcppbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f
+
+# ATT: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2
+# INTEL: vreducenepbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vreducenepbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2
+# INTEL: vreducenepbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vreducenepbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2
+# INTEL: vreducenepbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vreducenepbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmm3, 123
+0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+# INTEL: vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymm3, 123
+0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+# INTEL: vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmm3, 123
+0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+# INTEL: vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+# INTEL: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+# INTEL: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+# INTEL: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+# INTEL: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+# INTEL: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+# INTEL: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmm3
+0x62,0xf6,0x7c,0x08,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vrsqrtpbf16 xmm2 {k7}, xmm3
+0x62,0xf6,0x7c,0x0f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf6,0x7c,0x8f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmm3
+0x62,0xf6,0x7c,0x48,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vrsqrtpbf16 zmm2 {k7}, zmm3
+0x62,0xf6,0x7c,0x4f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf6,0x7c,0xcf,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymm3
+0x62,0xf6,0x7c,0x28,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vrsqrtpbf16 ymm2 {k7}, ymm3
+0x62,0xf6,0x7c,0x2f,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf6,0x7c,0xaf,0x4e,0xd3
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to8}, %xmm2
+# INTEL: vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf6,0x7c,0x18,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -512(,%ebp,2), %xmm2
+# INTEL: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to16}, %ymm2
+# INTEL: vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf6,0x7c,0x38,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80
+
+# ATT: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%eax){1to32}, %zmm2
+# INTEL: vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf6,0x7c,0x58,0x4e,0x10
+
+# ATT: vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f
+
+# ATT: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymm4
+0x62,0xf6,0x64,0x28,0x2c,0xd4
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf6,0x64,0x2f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf6,0x64,0xaf,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmm4
+0x62,0xf6,0x64,0x48,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf6,0x64,0x4f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf6,0x64,0xcf,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmm4
+0x62,0xf6,0x64,0x08,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf6,0x64,0x0f,0x2c,0xd4
+
+# ATT: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf6,0x64,0x8f,0x2c,0xd4
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf6,0x64,0x58,0x2c,0x10
+
+# ATT: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf6,0x64,0x38,0x2c,0x10
+
+# ATT: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80
+
+# ATT: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf6,0x64,0x18,0x2c,0x10
+
+# ATT: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f
+
+# ATT: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+# INTEL: vsqrtnepbf16 xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmm3
+0x62,0xf5,0x7d,0x48,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+# INTEL: vsqrtnepbf16 zmm2 {k7}, zmm3
+0x62,0xf5,0x7d,0x4f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+0x62,0xf5,0x7d,0xcf,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymm3
+0x62,0xf5,0x7d,0x28,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+# INTEL: vsqrtnepbf16 ymm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x2f,0x51,0xd3
+
+# ATT: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+0x62,0xf5,0x7d,0xaf,0x51,0xd3
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to8}, %xmm2
+# INTEL: vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x18,0x51,0x10
+
+# ATT: vsqrtnepbf16 -512(,%ebp,2), %xmm2
+# INTEL: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to16}, %ymm2
+# INTEL: vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+0x62,0xf5,0x7d,0x38,0x51,0x10
+
+# ATT: vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+# INTEL: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80
+
+# ATT: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%eax){1to32}, %zmm2
+# INTEL: vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+0x62,0xf5,0x7d,0x58,0x51,0x10
+
+# ATT: vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+# INTEL: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f
+
+# ATT: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymm4
+0x62,0xf5,0x65,0x28,0x5c,0xd4
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf5,0x65,0x2f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf5,0x65,0xaf,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmm4
+0x62,0xf5,0x65,0x48,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf5,0x65,0x4f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf5,0x65,0xcf,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmm4
+0x62,0xf5,0x65,0x08,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf5,0x65,0x0f,0x5c,0xd4
+
+# ATT: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf5,0x65,0x8f,0x5c,0xd4
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+# INTEL: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+0x62,0xf5,0x65,0x58,0x5c,0x10
+
+# ATT: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+# INTEL: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+0x62,0xf5,0x65,0x38,0x5c,0x10
+
+# ATT: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80
+
+# ATT: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+# INTEL: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+0x62,0xf5,0x65,0x18,0x5c,0x10
+
+# ATT: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f
+
+# ATT: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+# INTEL: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
new file mode 100644
index 00000000000000..953ef8dd8a14c9
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-bf16-64.txt
@@ -0,0 +1,3015 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x58,0xf0
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x58,0xf0
+
+# ATT: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x58,0xf0
+
+# ATT: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x58,0xf0
+
+# ATT: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x58,0xf0
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x58,0x72,0x80
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x58,0x72,0x80
+
+# ATT: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x58,0x71,0x7f
+
+# ATT: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x58,0x72,0x80
+
+# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymm24, 123
+0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmm24, 123
+0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmm24, 123
+0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+# INTEL: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+# INTEL: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+# INTEL: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b
+
+# ATT: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+# INTEL: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b
+
+# ATT: vcomsbf16 %xmm23, %xmm22
+# INTEL: vcomsbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x2f,0xf7
+
+# ATT: vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcomsbf16 291(%r8,%rax,4), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcomsbf16 (%rip), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rip]
+0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcomsbf16 -64(,%rbp,2), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [2*rbp - 64]
+0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomsbf16 254(%rcx), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rcx + 254]
+0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f
+
+# ATT: vcomsbf16 -256(%rdx), %xmm22
+# INTEL: vcomsbf16 xmm22, word ptr [rdx - 256]
+0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5e,0xf0
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5e,0xf0
+
+# ATT: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5e,0xf0
+
+# ATT: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5e,0xf0
+
+# ATT: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5e,0xf0
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80
+
+# ATT: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f
+
+# ATT: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5e,0x72,0x80
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x98,0xf0
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x98,0x72,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x98,0x72,0x80
+
+# ATT: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x98,0x71,0x7f
+
+# ATT: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x98,0x72,0x80
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xa8,0xf0
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80
+
+# ATT: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f
+
+# ATT: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xa8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xb8,0xf0
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80
+
+# ATT: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f
+
+# ATT: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xb8,0x72,0x80
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9a,0xf0
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80
+
+# ATT: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f
+
+# ATT: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9a,0x72,0x80
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xaa,0xf0
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80
+
+# ATT: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f
+
+# ATT: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xaa,0x72,0x80
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xba,0xf0
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xba,0x72,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xba,0x72,0x80
+
+# ATT: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xba,0x71,0x7f
+
+# ATT: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xba,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9c,0xf0
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80
+
+# ATT: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f
+
+# ATT: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9c,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xac,0xf0
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xac,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xac,0x72,0x80
+
+# ATT: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xac,0x71,0x7f
+
+# ATT: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xac,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xbc,0xf0
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80
+
+# ATT: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f
+
+# ATT: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xbc,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x9e,0xf0
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80
+
+# ATT: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f
+
+# ATT: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x9e,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xae,0xf0
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xae,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xae,0x72,0x80
+
+# ATT: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xae,0x71,0x7f
+
+# ATT: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xae,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0xbe,0xf0
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80
+
+# ATT: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f
+
+# ATT: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0xbe,0x72,0x80
+
+# ATT: vfpclasspbf16 $123, %zmm23, %k5
+# INTEL: vfpclasspbf16 k5, zmm23, 123
+0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmm23, 123
+0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm23, %k5
+# INTEL: vfpclasspbf16 k5, ymm23, 123
+0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymm23, 123
+0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm23, %k5
+# INTEL: vfpclasspbf16 k5, xmm23, 123
+0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmm23, 123
+0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b
+
+# ATT: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to8}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16x $123, -512(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to16}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b
+
+# ATT: vfpclasspbf16 $123, (%rip){1to32}, %k5
+# INTEL: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+# INTEL: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b
+
+# ATT: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+# INTEL: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b
+
+# ATT: vgetexppbf16 %xmm23, %xmm22
+# INTEL: vgetexppbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x42,0xf7
+
+# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vgetexppbf16 xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x42,0xf7
+
+# ATT: vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22
+# INTEL: vgetexppbf16 zmm22, zmm23
+0x62,0xa5,0x7d,0x48,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vgetexppbf16 zmm22 {k7}, zmm23
+0x62,0xa5,0x7d,0x4f,0x42,0xf7
+
+# ATT: vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa5,0x7d,0xcf,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22
+# INTEL: vgetexppbf16 ymm22, ymm23
+0x62,0xa5,0x7d,0x28,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vgetexppbf16 ymm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x2f,0x42,0xf7
+
+# ATT: vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa5,0x7d,0xaf,0x42,0xf7
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to8}, %xmm22
+# INTEL: vgetexppbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -512(,%rbp,2), %xmm22
+# INTEL: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to16}, %ymm22
+# INTEL: vgetexppbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80
+
+# ATT: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vgetexppbf16 (%rip){1to32}, %zmm22
+# INTEL: vgetexppbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vgetexppbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f
+
+# ATT: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vgetmantpbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vgetmantpbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vgetmantpbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b
+
+# ATT: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b
+
+# ATT: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5f,0xf0
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5f,0xf0
+
+# ATT: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5f,0xf0
+
+# ATT: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5f,0xf0
+
+# ATT: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5f,0xf0
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80
+
+# ATT: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f
+
+# ATT: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5f,0x72,0x80
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5d,0xf0
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5d,0xf0
+
+# ATT: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5d,0xf0
+
+# ATT: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5d,0xf0
+
+# ATT: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5d,0xf0
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80
+
+# ATT: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f
+
+# ATT: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5d,0x72,0x80
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x59,0xf0
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x59,0xf0
+
+# ATT: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x59,0xf0
+
+# ATT: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x59,0xf0
+
+# ATT: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x59,0xf0
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x59,0x72,0x80
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x59,0x72,0x80
+
+# ATT: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x59,0x71,0x7f
+
+# ATT: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x59,0x72,0x80
+
+# ATT: vrcppbf16 %xmm23, %xmm22
+# INTEL: vrcppbf16 xmm22, xmm23
+0x62,0xa6,0x7c,0x08,0x4c,0xf7
+
+# ATT: vrcppbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vrcppbf16 xmm22 {k7}, xmm23
+0x62,0xa6,0x7c,0x0f,0x4c,0xf7
+
+# ATT: vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa6,0x7c,0x8f,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22
+# INTEL: vrcppbf16 zmm22, zmm23
+0x62,0xa6,0x7c,0x48,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vrcppbf16 zmm22 {k7}, zmm23
+0x62,0xa6,0x7c,0x4f,0x4c,0xf7
+
+# ATT: vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa6,0x7c,0xcf,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22
+# INTEL: vrcppbf16 ymm22, ymm23
+0x62,0xa6,0x7c,0x28,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vrcppbf16 ymm22 {k7}, ymm23
+0x62,0xa6,0x7c,0x2f,0x4c,0xf7
+
+# ATT: vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa6,0x7c,0xaf,0x4c,0xf7
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to8}, %xmm22
+# INTEL: vrcppbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -512(,%rbp,2), %xmm22
+# INTEL: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to16}, %ymm22
+# INTEL: vrcppbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80
+
+# ATT: vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrcppbf16 (%rip){1to32}, %zmm22
+# INTEL: vrcppbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrcppbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f
+
+# ATT: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22
+# INTEL: vreducenepbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vreducenepbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22
+# INTEL: vreducenepbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vreducenepbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22
+# INTEL: vreducenepbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vreducenepbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b
+
+# ATT: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b
+
+# ATT: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmm23, 123
+0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+# INTEL: vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymm23, 123
+0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+# INTEL: vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmm23, 123
+0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+# INTEL: vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+# INTEL: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+# INTEL: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+# INTEL: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+# INTEL: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b
+
+# ATT: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b
+
+# ATT: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+# INTEL: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b
+
+# ATT: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+# INTEL: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b
+
+# ATT: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b
+
+# ATT: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmm23
+0x62,0xa6,0x7c,0x08,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vrsqrtpbf16 xmm22 {k7}, xmm23
+0x62,0xa6,0x7c,0x0f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa6,0x7c,0x8f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmm23
+0x62,0xa6,0x7c,0x48,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vrsqrtpbf16 zmm22 {k7}, zmm23
+0x62,0xa6,0x7c,0x4f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa6,0x7c,0xcf,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymm23
+0x62,0xa6,0x7c,0x28,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vrsqrtpbf16 ymm22 {k7}, ymm23
+0x62,0xa6,0x7c,0x2f,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa6,0x7c,0xaf,0x4e,0xf7
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to8}, %xmm22
+# INTEL: vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -512(,%rbp,2), %xmm22
+# INTEL: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to16}, %ymm22
+# INTEL: vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80
+
+# ATT: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vrsqrtpbf16 (%rip){1to32}, %zmm22
+# INTEL: vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f
+
+# ATT: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymm24
+0x62,0x86,0x44,0x20,0x2c,0xf0
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x86,0x44,0x27,0x2c,0xf0
+
+# ATT: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x86,0x44,0xa7,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmm24
+0x62,0x86,0x44,0x40,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x86,0x44,0x47,0x2c,0xf0
+
+# ATT: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x86,0x44,0xc7,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmm24
+0x62,0x86,0x44,0x00,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x86,0x44,0x07,0x2c,0xf0
+
+# ATT: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x86,0x44,0x87,0x2c,0xf0
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80
+
+# ATT: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f
+
+# ATT: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe6,0x44,0x97,0x2c,0x72,0x80
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+# INTEL: vsqrtnepbf16 xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmm23
+0x62,0xa5,0x7d,0x48,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+# INTEL: vsqrtnepbf16 zmm22 {k7}, zmm23
+0x62,0xa5,0x7d,0x4f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+0x62,0xa5,0x7d,0xcf,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymm23
+0x62,0xa5,0x7d,0x28,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+# INTEL: vsqrtnepbf16 ymm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x2f,0x51,0xf7
+
+# ATT: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+0x62,0xa5,0x7d,0xaf,0x51,0xf7
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to8}, %xmm22
+# INTEL: vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -512(,%rbp,2), %xmm22
+# INTEL: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to16}, %ymm22
+# INTEL: vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+# INTEL: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80
+
+# ATT: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsqrtnepbf16 (%rip){1to32}, %zmm22
+# INTEL: vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+# INTEL: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f
+
+# ATT: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+# INTEL: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymm24
+0x62,0x85,0x45,0x20,0x5c,0xf0
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x85,0x45,0x27,0x5c,0xf0
+
+# ATT: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x85,0x45,0xa7,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmm24
+0x62,0x85,0x45,0x40,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x85,0x45,0x47,0x5c,0xf0
+
+# ATT: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x85,0x45,0xc7,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmm24
+0x62,0x85,0x45,0x00,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x85,0x45,0x07,0x5c,0xf0
+
+# ATT: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x85,0x45,0x87,0x5c,0xf0
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+# INTEL: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+# INTEL: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80
+
+# ATT: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+# INTEL: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f
+
+# ATT: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+# INTEL: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+0x62,0xe5,0x45,0x97,0x5c,0x72,0x80
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-att.s b/llvm/test/MC/X86/avx10.2-bf16-32-att.s
new file mode 100644
index 00000000000000..9f62743177c9bd
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-32-att.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4]
+ vaddnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4]
+ vaddnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4]
+ vaddnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10]
+ vaddnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f]
+ vaddnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10]
+ vaddnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f]
+ vaddnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10]
+ vaddnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f]
+ vaddnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80]
+ vaddnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %ymm4, %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %ymm4, %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %xmm4, %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %xmm4, %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %zmm4, %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b]
+ vcmppbf16 $123, %zmm4, %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to32}, %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 $123, -2048(,%ebp,2), %zmm3, %k5
+
+// CHECK: vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 8128(%ecx), %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to32}, %zmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to8}, %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 $123, -512(,%ebp,2), %xmm3, %k5
+
+// CHECK: vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 2032(%ecx), %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to8}, %xmm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%esp,%esi,8), %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%edi,%eax,4), %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b]
+ vcmppbf16 $123, (%eax){1to16}, %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 $123, -1024(,%ebp,2), %ymm3, %k5
+
+// CHECK: vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 4064(%ecx), %ymm3, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%edx){1to16}, %ymm3, %k5 {%k7}
+
+// CHECK: vcomsbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3]
+ vcomsbf16 %xmm3, %xmm2
+
+// CHECK: vcomsbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomsbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcomsbf16 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomsbf16 291(%edi,%eax,4), %xmm2
+
+// CHECK: vcomsbf16 (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10]
+ vcomsbf16 (%eax), %xmm2
+
+// CHECK: vcomsbf16 -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 -64(,%ebp,2), %xmm2
+
+// CHECK: vcomsbf16 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f]
+ vcomsbf16 254(%ecx), %xmm2
+
+// CHECK: vcomsbf16 -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80]
+ vcomsbf16 -256(%edx), %xmm2
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4]
+ vdivnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4]
+ vdivnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4]
+ vdivnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10]
+ vdivnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f]
+ vdivnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10]
+ vdivnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f]
+ vdivnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10]
+ vdivnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f]
+ vdivnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80]
+ vdivnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4]
+ vfmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4]
+ vfmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4]
+ vfmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10]
+ vfmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f]
+ vfmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80]
+ vfmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4]
+ vfmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4]
+ vfmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4]
+ vfmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10]
+ vfmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80]
+ vfmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4]
+ vfmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4]
+ vfmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4]
+ vfmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10]
+ vfmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80]
+ vfmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4]
+ vfmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4]
+ vfmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4]
+ vfmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10]
+ vfmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80]
+ vfmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4]
+ vfmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4]
+ vfmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4]
+ vfmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10]
+ vfmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80]
+ vfmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4]
+ vfmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4]
+ vfmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4]
+ vfmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10]
+ vfmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f]
+ vfmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80]
+ vfmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4]
+ vfnmadd132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4]
+ vfnmadd132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4]
+ vfnmadd132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10]
+ vfnmadd132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4]
+ vfnmadd213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4]
+ vfnmadd213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4]
+ vfnmadd213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10]
+ vfnmadd213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80]
+ vfnmadd213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4]
+ vfnmadd231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4]
+ vfnmadd231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4]
+ vfnmadd231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10]
+ vfnmadd231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4]
+ vfnmsub132nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4]
+ vfnmsub132nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4]
+ vfnmsub132nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10]
+ vfnmsub132nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4]
+ vfnmsub213nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4]
+ vfnmsub213nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4]
+ vfnmsub213nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10]
+ vfnmsub213nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80]
+ vfnmsub213nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4]
+ vfnmsub231nepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4]
+ vfnmsub231nepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4]
+ vfnmsub231nepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10]
+ vfnmsub231nepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vfpclasspbf16 $123, %zmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %zmm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %zmm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %ymm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %ymm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %ymm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %xmm3, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %xmm3, %k5
+
+// CHECK: vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b]
+ vfpclasspbf16 $123, %xmm3, %k5 {%k7}
+
+// CHECK: vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16x $123, 268435456(%esp,%esi,8), %k5
+
+// CHECK: vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16x $123, 291(%edi,%eax,4), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to8}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to8}, %k5
+
+// CHECK: vfpclasspbf16x $123, -512(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16x $123, -512(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16x $123, 2032(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to8}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to16}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to16}, %k5
+
+// CHECK: vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16y $123, -1024(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16y $123, 4064(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to16}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%eax){1to32}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b]
+ vfpclasspbf16 $123, (%eax){1to32}, %k5
+
+// CHECK: vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16z $123, -2048(,%ebp,2), %k5
+
+// CHECK: vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16z $123, 8128(%ecx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%edx){1to32}, %k5 {%k7}
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3]
+ vgetexppbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3]
+ vgetexppbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3]
+ vgetexppbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10]
+ vgetexppbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vgetexppbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f]
+ vgetexppbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10]
+ vgetexppbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vgetexppbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f]
+ vgetexppbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vgetexppbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10]
+ vgetexppbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vgetexppbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f]
+ vgetexppbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80]
+ vgetexppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b]
+ vgetmantpbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b]
+ vgetmantpbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4]
+ vmaxpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4]
+ vmaxpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4]
+ vmaxpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10]
+ vmaxpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f]
+ vmaxpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10]
+ vmaxpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f]
+ vmaxpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10]
+ vmaxpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f]
+ vmaxpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80]
+ vmaxpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4]
+ vminpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4]
+ vminpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4]
+ vminpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10]
+ vminpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f]
+ vminpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10]
+ vminpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f]
+ vminpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10]
+ vminpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f]
+ vminpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80]
+ vminpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4]
+ vmulnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4]
+ vmulnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4]
+ vmulnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10]
+ vmulnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f]
+ vmulnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10]
+ vmulnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f]
+ vmulnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10]
+ vmulnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f]
+ vmulnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80]
+ vmulnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2
+
+// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3]
+ vrcppbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2
+
+// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3]
+ vrcppbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2
+
+// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3]
+ vrcppbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10]
+ vrcppbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vrcppbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f]
+ vrcppbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10]
+ vrcppbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vrcppbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f]
+ vrcppbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrcppbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10]
+ vrcppbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vrcppbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f]
+ vrcppbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80]
+ vrcppbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b]
+ vreducenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b]
+ vreducenepbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 $123, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to8}, %xmm2
+
+// CHECK: vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -512(,%ebp,2), %xmm2
+
+// CHECK: vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to16}, %ymm2
+
+// CHECK: vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -1024(,%ebp,2), %ymm2
+
+// CHECK: vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b]
+ vrndscalenepbf16 $123, (%eax){1to32}, %zmm2
+
+// CHECK: vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -2048(,%ebp,2), %zmm2
+
+// CHECK: vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 $123, 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3]
+ vrsqrtpbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3]
+ vrsqrtpbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3]
+ vrsqrtpbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vrsqrtpbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f]
+ vrsqrtpbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vrsqrtpbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10]
+ vrsqrtpbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80]
+ vrsqrtpbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4]
+ vscalefpbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4]
+ vscalefpbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4]
+ vscalefpbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10]
+ vscalefpbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f]
+ vscalefpbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10]
+ vscalefpbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f]
+ vscalefpbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10]
+ vscalefpbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f]
+ vscalefpbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80]
+ vscalefpbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3]
+ vsqrtnepbf16 %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3]
+ vsqrtnepbf16 %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2 {%k7}
+
+// CHECK: vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3]
+ vsqrtnepbf16 %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to8}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to8}, %xmm2
+
+// CHECK: vsqrtnepbf16 -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 -512(,%ebp,2), %xmm2
+
+// CHECK: vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f]
+ vsqrtnepbf16 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to8}, %xmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to16}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to16}, %ymm2
+
+// CHECK: vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 -1024(,%ebp,2), %ymm2
+
+// CHECK: vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f]
+ vsqrtnepbf16 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to16}, %ymm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vsqrtnepbf16 (%eax){1to32}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10]
+ vsqrtnepbf16 (%eax){1to32}, %zmm2
+
+// CHECK: vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 -2048(,%ebp,2), %zmm2
+
+// CHECK: vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f]
+ vsqrtnepbf16 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80]
+ vsqrtnepbf16 -256(%edx){1to32}, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7}
+
+// CHECK: vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4]
+ vsubnepbf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7}
+
+// CHECK: vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4]
+ vsubnepbf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7}
+
+// CHECK: vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4]
+ vsubnepbf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10]
+ vsubnepbf16 (%eax){1to32}, %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f]
+ vsubnepbf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to32}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10]
+ vsubnepbf16 (%eax){1to16}, %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f]
+ vsubnepbf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to16}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+
+// CHECK: vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10]
+ vsubnepbf16 (%eax){1to8}, %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f]
+ vsubnepbf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80]
+ vsubnepbf16 -256(%edx){1to8}, %xmm3, %xmm2 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-32-intel.s b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s
new file mode 100644
index 00000000000000..30c2cf45297bc0
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-32-intel.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0xd4]
+ vaddnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0xd4]
+ vaddnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0xd4]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0xd4]
+ vaddnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0xd4]
+ vaddnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0xd4]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0xd4]
+ vaddnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0xd4]
+ vaddnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0xd4]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x58,0x10]
+ vaddnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x58,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x58,0x51,0x7f]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x58,0x52,0x80]
+ vaddnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x58,0x10]
+ vaddnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x58,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x58,0x51,0x7f]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x58,0x52,0x80]
+ vaddnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vaddnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x58,0x94,0x87,0x23,0x01,0x00,0x00]
+ vaddnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x58,0x10]
+ vaddnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x58,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x58,0x51,0x7f]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x58,0x52,0x80]
+ vaddnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vcmppbf16 k5, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xec,0x7b]
+ vcmppbf16 k5, ymm3, ymm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymm4, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xec,0x7b]
+ vcmppbf16 k5, xmm3, xmm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmm4, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xec,0x7b]
+ vcmppbf16 k5, zmm3, zmm4, 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xec,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmm4, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, zmm3, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x58,0xc2,0x28,0x7b]
+ vcmppbf16 k5, zmm3, word ptr [eax]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x48,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 k5, zmm3, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x4f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x5f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, zmm3, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, xmm3, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x18,0xc2,0x28,0x7b]
+ vcmppbf16 k5, xmm3, word ptr [eax]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x08,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 k5, xmm3, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x0f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x1f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, xmm3, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, ymm3, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x38,0xc2,0x28,0x7b]
+ vcmppbf16 k5, ymm3, word ptr [eax]{1to16}, 123
+
+// CHECK: vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x28,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 k5, ymm3, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x2f,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x67,0x3f,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, ymm3, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vcomsbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xd3]
+ vcomsbf16 xmm2, xmm3
+
+// CHECK: vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomsbf16 xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomsbf16 xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vcomsbf16 xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x10]
+ vcomsbf16 xmm2, word ptr [eax]
+
+// CHECK: vcomsbf16 xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vcomsbf16 xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x51,0x7f]
+ vcomsbf16 xmm2, word ptr [ecx + 254]
+
+// CHECK: vcomsbf16 xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x2f,0x52,0x80]
+ vcomsbf16 xmm2, word ptr [edx - 256]
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0xd4]
+ vdivnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0xd4]
+ vdivnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0xd4]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0xd4]
+ vdivnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0xd4]
+ vdivnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0xd4]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0xd4]
+ vdivnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0xd4]
+ vdivnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0xd4]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5e,0x10]
+ vdivnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5e,0x51,0x7f]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5e,0x52,0x80]
+ vdivnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5e,0x10]
+ vdivnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5e,0x51,0x7f]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5e,0x52,0x80]
+ vdivnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vdivnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vdivnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5e,0x10]
+ vdivnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5e,0x51,0x7f]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5e,0x52,0x80]
+ vdivnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0xd4]
+ vfmadd132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0xd4]
+ vfmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0xd4]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0xd4]
+ vfmadd132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0xd4]
+ vfmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0xd4]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0xd4]
+ vfmadd132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0xd4]
+ vfmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0xd4]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x98,0x10]
+ vfmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x98,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x98,0x52,0x80]
+ vfmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x98,0x10]
+ vfmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x98,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x98,0x51,0x7f]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x98,0x52,0x80]
+ vfmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x98,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x98,0x10]
+ vfmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x98,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x98,0x51,0x7f]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x98,0x52,0x80]
+ vfmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0xd4]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0xd4]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0xd4]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xa8,0x10]
+ vfmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xa8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xa8,0x10]
+ vfmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xa8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xa8,0x52,0x80]
+ vfmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xa8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xa8,0x10]
+ vfmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xa8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xa8,0x51,0x7f]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xa8,0x52,0x80]
+ vfmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0xd4]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0xd4]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0xd4]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xb8,0x10]
+ vfmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xb8,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xb8,0x10]
+ vfmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xb8,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xb8,0x52,0x80]
+ vfmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xb8,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xb8,0x10]
+ vfmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xb8,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xb8,0x51,0x7f]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xb8,0x52,0x80]
+ vfmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0xd4]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0xd4]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0xd4]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9a,0x10]
+ vfmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9a,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9a,0x10]
+ vfmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9a,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9a,0x52,0x80]
+ vfmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9a,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9a,0x10]
+ vfmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9a,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9a,0x51,0x7f]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9a,0x52,0x80]
+ vfmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0xd4]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0xd4]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0xd4]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xaa,0x10]
+ vfmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xaa,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xaa,0x10]
+ vfmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xaa,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xaa,0x52,0x80]
+ vfmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xaa,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xaa,0x10]
+ vfmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xaa,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xaa,0x51,0x7f]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xaa,0x52,0x80]
+ vfmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0xd4]
+ vfmsub231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0xd4]
+ vfmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0xd4]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0xd4]
+ vfmsub231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0xd4]
+ vfmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0xd4]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0xd4]
+ vfmsub231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0xd4]
+ vfmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0xd4]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xba,0x10]
+ vfmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xba,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xba,0x52,0x80]
+ vfmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xba,0x10]
+ vfmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xba,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xba,0x51,0x7f]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xba,0x52,0x80]
+ vfmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xba,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xba,0x10]
+ vfmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xba,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xba,0x51,0x7f]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xba,0x52,0x80]
+ vfmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0xd4]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0xd4]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0xd4]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9c,0x10]
+ vfnmadd132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9c,0x10]
+ vfnmadd132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9c,0x10]
+ vfnmadd132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9c,0x51,0x7f]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9c,0x52,0x80]
+ vfnmadd132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0xd4]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0xd4]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0xd4]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xac,0x10]
+ vfnmadd213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xac,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xac,0x10]
+ vfnmadd213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xac,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xac,0x52,0x80]
+ vfnmadd213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xac,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xac,0x10]
+ vfnmadd213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xac,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xac,0x51,0x7f]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xac,0x52,0x80]
+ vfnmadd213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0xd4]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0xd4]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0xd4]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbc,0x10]
+ vfnmadd231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbc,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbc,0x10]
+ vfnmadd231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbc,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbc,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbc,0x10]
+ vfnmadd231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbc,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbc,0x51,0x7f]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbc,0x52,0x80]
+ vfnmadd231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0xd4]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0xd4]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0xd4]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x9e,0x10]
+ vfnmsub132nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x9e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x9e,0x10]
+ vfnmsub132nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x9e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x9e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x9e,0x10]
+ vfnmsub132nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x9e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x9e,0x51,0x7f]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x9e,0x52,0x80]
+ vfnmsub132nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0xd4]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0xd4]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0xd4]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xae,0x10]
+ vfnmsub213nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xae,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xae,0x10]
+ vfnmsub213nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xae,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xae,0x52,0x80]
+ vfnmsub213nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xae,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xae,0x10]
+ vfnmsub213nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xae,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xae,0x51,0x7f]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xae,0x52,0x80]
+ vfnmsub213nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0xd4]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0xd4]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0xd4]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0xbe,0x10]
+ vfnmsub231nepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0xbe,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0xbe,0x10]
+ vfnmsub231nepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0xbe,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0xbe,0x94,0x87,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0xbe,0x10]
+ vfnmsub231nepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0xbe,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0xbe,0x51,0x7f]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0xbe,0x52,0x80]
+ vfnmsub231nepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vfpclasspbf16 k5, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, zmm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, zmm3, 123
+
+// CHECK: vfpclasspbf16 k5, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, ymm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, ymm3, 123
+
+// CHECK: vfpclasspbf16 k5, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5, xmm3, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xeb,0x7b]
+ vfpclasspbf16 k5 {k7}, xmm3, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0xac,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0xac,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x28,0x7b]
+ vfpclasspbf16 k5, word ptr [eax]{1to32}, 123
+
+// CHECK: vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vgetexppbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0xd3]
+ vgetexppbf16 xmm2, xmm3
+
+// CHECK: vgetexppbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0xd3]
+ vgetexppbf16 xmm2 {k7}, xmm3
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0xd3]
+ vgetexppbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vgetexppbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0xd3]
+ vgetexppbf16 zmm2, zmm3
+
+// CHECK: vgetexppbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0xd3]
+ vgetexppbf16 zmm2 {k7}, zmm3
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0xd3]
+ vgetexppbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vgetexppbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0xd3]
+ vgetexppbf16 ymm2, ymm3
+
+// CHECK: vgetexppbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0xd3]
+ vgetexppbf16 ymm2 {k7}, ymm3
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0xd3]
+ vgetexppbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x42,0x10]
+ vgetexppbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x42,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x42,0x51,0x7f]
+ vgetexppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x42,0x52,0x80]
+ vgetexppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x42,0x10]
+ vgetexppbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x42,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x42,0x51,0x7f]
+ vgetexppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x42,0x52,0x80]
+ vgetexppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vgetexppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x42,0x94,0x87,0x23,0x01,0x00,0x00]
+ vgetexppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vgetexppbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x42,0x10]
+ vgetexppbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x42,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x42,0x51,0x7f]
+ vgetexppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x42,0x52,0x80]
+ vgetexppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vgetmantpbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2, zmm3, 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0xd3,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2, ymm3, 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0xd3,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0xd3,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x26,0x10,0x7b]
+ vgetmantpbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x26,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x26,0x10,0x7b]
+ vgetmantpbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x26,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x26,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x26,0x10,0x7b]
+ vgetmantpbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x26,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x26,0x51,0x7f,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x26,0x52,0x80,0x7b]
+ vgetmantpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0xd4]
+ vmaxpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0xd4]
+ vmaxpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0xd4]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0xd4]
+ vmaxpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0xd4]
+ vmaxpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0xd4]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0xd4]
+ vmaxpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0xd4]
+ vmaxpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0xd4]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5f,0x10]
+ vmaxpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5f,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5f,0x51,0x7f]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5f,0x52,0x80]
+ vmaxpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5f,0x10]
+ vmaxpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5f,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5f,0x51,0x7f]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5f,0x52,0x80]
+ vmaxpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmaxpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmaxpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5f,0x10]
+ vmaxpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5f,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5f,0x51,0x7f]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5f,0x52,0x80]
+ vmaxpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0xd4]
+ vminpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0xd4]
+ vminpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0xd4]
+ vminpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vminpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0xd4]
+ vminpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0xd4]
+ vminpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0xd4]
+ vminpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vminpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0xd4]
+ vminpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0xd4]
+ vminpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0xd4]
+ vminpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5d,0x10]
+ vminpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5d,0x51,0x7f]
+ vminpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5d,0x52,0x80]
+ vminpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5d,0x10]
+ vminpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5d,0x51,0x7f]
+ vminpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5d,0x52,0x80]
+ vminpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vminpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vminpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5d,0x10]
+ vminpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5d,0x51,0x7f]
+ vminpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5d,0x52,0x80]
+ vminpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0xd4]
+ vmulnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0xd4]
+ vmulnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0xd4]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0xd4]
+ vmulnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0xd4]
+ vmulnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0xd4]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0xd4]
+ vmulnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0xd4]
+ vmulnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0xd4]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x59,0x10]
+ vmulnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x59,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x59,0x51,0x7f]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x59,0x52,0x80]
+ vmulnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x59,0x10]
+ vmulnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x59,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x59,0x51,0x7f]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x59,0x52,0x80]
+ vmulnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmulnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x59,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmulnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x59,0x10]
+ vmulnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x59,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x59,0x51,0x7f]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x59,0x52,0x80]
+ vmulnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vrcppbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0xd3]
+ vrcppbf16 xmm2, xmm3
+
+// CHECK: vrcppbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0xd3]
+ vrcppbf16 xmm2 {k7}, xmm3
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0xd3]
+ vrcppbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vrcppbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0xd3]
+ vrcppbf16 zmm2, zmm3
+
+// CHECK: vrcppbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0xd3]
+ vrcppbf16 zmm2 {k7}, zmm3
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0xd3]
+ vrcppbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vrcppbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0xd3]
+ vrcppbf16 ymm2, ymm3
+
+// CHECK: vrcppbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0xd3]
+ vrcppbf16 ymm2 {k7}, ymm3
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0xd3]
+ vrcppbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4c,0x10]
+ vrcppbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4c,0x51,0x7f]
+ vrcppbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4c,0x52,0x80]
+ vrcppbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4c,0x10]
+ vrcppbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4c,0x51,0x7f]
+ vrcppbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4c,0x52,0x80]
+ vrcppbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrcppbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrcppbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrcppbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4c,0x10]
+ vrcppbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4c,0x51,0x7f]
+ vrcppbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4c,0x52,0x80]
+ vrcppbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vreducenepbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2, zmm3, 123
+
+// CHECK: vreducenepbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0xd3,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vreducenepbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2, ymm3, 123
+
+// CHECK: vreducenepbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0xd3,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vreducenepbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0xd3,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x56,0x10,0x7b]
+ vreducenepbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x56,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x56,0x10,0x7b]
+ vreducenepbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x56,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x56,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x56,0x10,0x7b]
+ vreducenepbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x56,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x56,0x51,0x7f,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x56,0x52,0x80,0x7b]
+ vreducenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2, zmm3, 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2 {k7}, zmm3, 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, zmm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2, ymm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2 {k7}, ymm3, 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0xd3,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, ymm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2 {k7}, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0xd3,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, xmm3, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x08,0x10,0x7b]
+ vrndscalenepbf16 xmm2, word ptr [eax]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x08,0x14,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 xmm2, xmmword ptr [2*ebp - 512], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x8f,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032], 123
+
+// CHECK: vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x9f,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x08,0x10,0x7b]
+ vrndscalenepbf16 ymm2, word ptr [eax]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x08,0x14,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 ymm2, ymmword ptr [2*ebp - 1024], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xaf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064], 123
+
+// CHECK: vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xbf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x94,0xf4,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x08,0x94,0x87,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291], 123
+
+// CHECK: vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x08,0x10,0x7b]
+ vrndscalenepbf16 zmm2, word ptr [eax]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x08,0x14,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 zmm2, zmmword ptr [2*ebp - 2048], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xcf,0x08,0x51,0x7f,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128], 123
+
+// CHECK: vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0xdf,0x08,0x52,0x80,0x7b]
+ vrndscalenepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}, 123
+
+// CHECK: vrsqrtpbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0xd3]
+ vrsqrtpbf16 xmm2, xmm3
+
+// CHECK: vrsqrtpbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0xd3]
+ vrsqrtpbf16 xmm2 {k7}, xmm3
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0xd3]
+ vrsqrtpbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vrsqrtpbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0xd3]
+ vrsqrtpbf16 zmm2, zmm3
+
+// CHECK: vrsqrtpbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0xd3]
+ vrsqrtpbf16 zmm2 {k7}, zmm3
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0xd3]
+ vrsqrtpbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vrsqrtpbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0xd3]
+ vrsqrtpbf16 ymm2, ymm3
+
+// CHECK: vrsqrtpbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0xd3]
+ vrsqrtpbf16 ymm2 {k7}, ymm3
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0xd3]
+ vrsqrtpbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x0f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x18,0x4e,0x10]
+ vrsqrtpbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x08,0x4e,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x8f,0x4e,0x51,0x7f]
+ vrsqrtpbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x9f,0x4e,0x52,0x80]
+ vrsqrtpbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x2f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x38,0x4e,0x10]
+ vrsqrtpbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x28,0x4e,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xaf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xbf,0x4e,0x52,0x80]
+ vrsqrtpbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x4f,0x4e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0x58,0x4e,0x10]
+ vrsqrtpbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x7c,0x48,0x4e,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x7c,0xcf,0x4e,0x51,0x7f]
+ vrsqrtpbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x7c,0xdf,0x4e,0x52,0x80]
+ vrsqrtpbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0xd4]
+ vscalefpbf16 ymm2, ymm3, ymm4
+
+// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0xd4]
+ vscalefpbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0xd4]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0xd4]
+ vscalefpbf16 zmm2, zmm3, zmm4
+
+// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0xd4]
+ vscalefpbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0xd4]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0xd4]
+ vscalefpbf16 xmm2, xmm3, xmm4
+
+// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0xd4]
+ vscalefpbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0xd4]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x4f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0x58,0x2c,0x10]
+ vscalefpbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf6,0x64,0x48,0x2c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf6,0x64,0xcf,0x2c,0x51,0x7f]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf6,0x64,0xdf,0x2c,0x52,0x80]
+ vscalefpbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x2f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0x38,0x2c,0x10]
+ vscalefpbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf6,0x64,0x28,0x2c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x64,0xaf,0x2c,0x51,0x7f]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x64,0xbf,0x2c,0x52,0x80]
+ vscalefpbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vscalefpbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf6,0x64,0x0f,0x2c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vscalefpbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x18,0x2c,0x10]
+ vscalefpbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf6,0x64,0x08,0x2c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf6,0x64,0x8f,0x2c,0x51,0x7f]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf6,0x64,0x9f,0x2c,0x52,0x80]
+ vscalefpbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0xd3]
+ vsqrtnepbf16 xmm2, xmm3
+
+// CHECK: vsqrtnepbf16 xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0xd3]
+ vsqrtnepbf16 xmm2 {k7}, xmm3
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0xd3]
+ vsqrtnepbf16 xmm2 {k7} {z}, xmm3
+
+// CHECK: vsqrtnepbf16 zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0xd3]
+ vsqrtnepbf16 zmm2, zmm3
+
+// CHECK: vsqrtnepbf16 zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0xd3]
+ vsqrtnepbf16 zmm2 {k7}, zmm3
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0xd3]
+ vsqrtnepbf16 zmm2 {k7} {z}, zmm3
+
+// CHECK: vsqrtnepbf16 ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0xd3]
+ vsqrtnepbf16 ymm2, ymm3
+
+// CHECK: vsqrtnepbf16 ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0xd3]
+ vsqrtnepbf16 ymm2 {k7}, ymm3
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0xd3]
+ vsqrtnepbf16 ymm2 {k7} {z}, ymm3
+
+// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x51,0x10]
+ vsqrtnepbf16 xmm2, word ptr [eax]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x51,0x51,0x7f]
+ vsqrtnepbf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x51,0x52,0x80]
+ vsqrtnepbf16 xmm2 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x51,0x10]
+ vsqrtnepbf16 ymm2, word ptr [eax]{1to16}
+
+// CHECK: vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x51,0x51,0x7f]
+ vsqrtnepbf16 ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x51,0x52,0x80]
+ vsqrtnepbf16 ymm2 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x51,0x10]
+ vsqrtnepbf16 zmm2, word ptr [eax]{1to32}
+
+// CHECK: vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x51,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x51,0x51,0x7f]
+ vsqrtnepbf16 zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x51,0x52,0x80]
+ vsqrtnepbf16 zmm2 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0xd4]
+ vsubnepbf16 ymm2, ymm3, ymm4
+
+// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0xd4]
+ vsubnepbf16 ymm2 {k7}, ymm3, ymm4
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0xd4]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, ymm4
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0xd4]
+ vsubnepbf16 zmm2, zmm3, zmm4
+
+// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0xd4]
+ vsubnepbf16 zmm2 {k7}, zmm3, zmm4
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0xd4]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, zmm4
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0xd4]
+ vsubnepbf16 xmm2, xmm3, xmm4
+
+// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0xd4]
+ vsubnepbf16 xmm2 {k7}, xmm3, xmm4
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0xd4]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, xmm4
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x4f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0x58,0x5c,0x10]
+ vsubnepbf16 zmm2, zmm3, word ptr [eax]{1to32}
+
+// CHECK: vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x65,0x48,0x5c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x65,0xcf,0x5c,0x51,0x7f]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x65,0xdf,0x5c,0x52,0x80]
+ vsubnepbf16 zmm2 {k7} {z}, zmm3, word ptr [edx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x2f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0x38,0x5c,0x10]
+ vsubnepbf16 ymm2, ymm3, word ptr [eax]{1to16}
+
+// CHECK: vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x65,0x28,0x5c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x65,0xaf,0x5c,0x51,0x7f]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x65,0xbf,0x5c,0x52,0x80]
+ vsubnepbf16 ymm2 {k7} {z}, ymm3, word ptr [edx - 256]{1to16}
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsubnepbf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x65,0x0f,0x5c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsubnepbf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x18,0x5c,0x10]
+ vsubnepbf16 xmm2, xmm3, word ptr [eax]{1to8}
+
+// CHECK: vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x65,0x08,0x5c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x65,0x8f,0x5c,0x51,0x7f]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x65,0x9f,0x5c,0x52,0x80]
+ vsubnepbf16 xmm2 {k7} {z}, xmm3, word ptr [edx - 256]{1to8}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-att.s b/llvm/test/MC/X86/avx10.2-bf16-64-att.s
new file mode 100644
index 00000000000000..85d99cfe0a7043
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-64-att.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0]
+ vaddnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0]
+ vaddnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0]
+ vaddnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f]
+ vaddnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f]
+ vaddnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f]
+ vaddnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80]
+ vaddnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %ymm24, %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %ymm24, %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %xmm24, %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %xmm24, %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5
+// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %zmm24, %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b]
+ vcmppbf16 $123, %zmm24, %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to32}, %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 $123, -2048(,%rbp,2), %zmm23, %k5
+
+// CHECK: vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 8128(%rcx), %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to32}, %zmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to8}, %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 $123, -512(,%rbp,2), %xmm23, %k5
+
+// CHECK: vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 2032(%rcx), %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to8}, %xmm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 $123, 268435456(%rbp,%r14,8), %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 $123, 291(%r8,%rax,4), %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 $123, (%rip){1to16}, %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 $123, -1024(,%rbp,2), %ymm23, %k5
+
+// CHECK: vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 $123, 4064(%rcx), %ymm23, %k5 {%k7}
+
+// CHECK: vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 $123, -256(%rdx){1to16}, %ymm23, %k5 {%k7}
+
+// CHECK: vcomsbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7]
+ vcomsbf16 %xmm23, %xmm22
+
+// CHECK: vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomsbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcomsbf16 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomsbf16 291(%r8,%rax,4), %xmm22
+
+// CHECK: vcomsbf16 (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomsbf16 (%rip), %xmm22
+
+// CHECK: vcomsbf16 -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 -64(,%rbp,2), %xmm22
+
+// CHECK: vcomsbf16 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f]
+ vcomsbf16 254(%rcx), %xmm22
+
+// CHECK: vcomsbf16 -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80]
+ vcomsbf16 -256(%rdx), %xmm22
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0]
+ vdivnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0]
+ vdivnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0]
+ vdivnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f]
+ vdivnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f]
+ vdivnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f]
+ vdivnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80]
+ vdivnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0]
+ vfmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0]
+ vfmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0]
+ vfmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f]
+ vfmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80]
+ vfmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0]
+ vfmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0]
+ vfmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0]
+ vfmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80]
+ vfmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0]
+ vfmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0]
+ vfmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0]
+ vfmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80]
+ vfmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0]
+ vfmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0]
+ vfmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0]
+ vfmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80]
+ vfmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0]
+ vfmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0]
+ vfmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0]
+ vfmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80]
+ vfmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0]
+ vfmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0]
+ vfmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0]
+ vfmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f]
+ vfmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80]
+ vfmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0]
+ vfnmadd132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0]
+ vfnmadd132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0]
+ vfnmadd132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0]
+ vfnmadd213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0]
+ vfnmadd213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0]
+ vfnmadd213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80]
+ vfnmadd213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0]
+ vfnmadd231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0]
+ vfnmadd231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0]
+ vfnmadd231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0]
+ vfnmsub132nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0]
+ vfnmsub132nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0]
+ vfnmsub132nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0]
+ vfnmsub213nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0]
+ vfnmsub213nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0]
+ vfnmsub213nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80]
+ vfnmsub213nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0]
+ vfnmsub231nepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0]
+ vfnmsub231nepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0]
+ vfnmsub231nepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vfpclasspbf16 $123, %zmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %zmm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %zmm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %ymm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %ymm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %ymm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, %xmm23, %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %xmm23, %k5
+
+// CHECK: vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b]
+ vfpclasspbf16 $123, %xmm23, %k5 {%k7}
+
+// CHECK: vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16x $123, 268435456(%rbp,%r14,8), %k5
+
+// CHECK: vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16x $123, 291(%r8,%rax,4), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to8}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to8}, %k5
+
+// CHECK: vfpclasspbf16x $123, -512(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16x $123, -512(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16x $123, 2032(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to8}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to16}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to16}, %k5
+
+// CHECK: vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16y $123, -1024(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16y $123, 4064(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to16}, %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, (%rip){1to32}, %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 $123, (%rip){1to32}, %k5
+
+// CHECK: vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16z $123, -2048(,%rbp,2), %k5
+
+// CHECK: vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16z $123, 8128(%rcx), %k5 {%k7}
+
+// CHECK: vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 $123, -256(%rdx){1to32}, %k5 {%k7}
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7]
+ vgetexppbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7]
+ vgetexppbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7]
+ vgetexppbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vgetexppbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f]
+ vgetexppbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vgetexppbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f]
+ vgetexppbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vgetexppbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vgetexppbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f]
+ vgetexppbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80]
+ vgetexppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b]
+ vgetmantpbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0]
+ vmaxpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0]
+ vmaxpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0]
+ vmaxpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f]
+ vmaxpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f]
+ vmaxpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f]
+ vmaxpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80]
+ vmaxpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0]
+ vminpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0]
+ vminpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0]
+ vminpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f]
+ vminpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f]
+ vminpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f]
+ vminpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80]
+ vminpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0]
+ vmulnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0]
+ vmulnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0]
+ vmulnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f]
+ vmulnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f]
+ vmulnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f]
+ vmulnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80]
+ vmulnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22
+
+// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7]
+ vrcppbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22
+
+// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7]
+ vrcppbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22
+
+// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7]
+ vrcppbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vrcppbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f]
+ vrcppbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vrcppbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f]
+ vrcppbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrcppbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vrcppbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f]
+ vrcppbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80]
+ vrcppbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b]
+ vreducenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vreducenepbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 $123, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to8}, %xmm22
+
+// CHECK: vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -512(,%rbp,2), %xmm22
+
+// CHECK: vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to16}, %ymm22
+
+// CHECK: vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -1024(,%rbp,2), %ymm22
+
+// CHECK: vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 $123, 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 $123, (%rip){1to32}, %zmm22
+
+// CHECK: vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 $123, -2048(,%rbp,2), %zmm22
+
+// CHECK: vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 $123, 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 $123, -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7]
+ vrsqrtpbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7]
+ vrsqrtpbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7]
+ vrsqrtpbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vrsqrtpbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f]
+ vrsqrtpbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vrsqrtpbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80]
+ vrsqrtpbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0]
+ vscalefpbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0]
+ vscalefpbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0]
+ vscalefpbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f]
+ vscalefpbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f]
+ vscalefpbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f]
+ vscalefpbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80]
+ vscalefpbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7]
+ vsqrtnepbf16 %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7]
+ vsqrtnepbf16 %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22 {%k7}
+
+// CHECK: vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7]
+ vsqrtnepbf16 %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to8}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to8}, %xmm22
+
+// CHECK: vsqrtnepbf16 -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 -512(,%rbp,2), %xmm22
+
+// CHECK: vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f]
+ vsqrtnepbf16 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to8}, %xmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to16}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to16}, %ymm22
+
+// CHECK: vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 -1024(,%rbp,2), %ymm22
+
+// CHECK: vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f]
+ vsqrtnepbf16 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to16}, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vsqrtnepbf16 (%rip){1to32}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 (%rip){1to32}, %zmm22
+
+// CHECK: vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 -2048(,%rbp,2), %zmm22
+
+// CHECK: vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f]
+ vsqrtnepbf16 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80]
+ vsqrtnepbf16 -256(%rdx){1to32}, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0]
+ vsubnepbf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7}
+
+// CHECK: vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0]
+ vsubnepbf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7}
+
+// CHECK: vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0]
+ vsubnepbf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to32}, %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f]
+ vsubnepbf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to32}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to16}, %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f]
+ vsubnepbf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to16}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+
+// CHECK: vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 (%rip){1to8}, %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f]
+ vsubnepbf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80]
+ vsubnepbf16 -256(%rdx){1to8}, %xmm23, %xmm22 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10.2-bf16-64-intel.s b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s
new file mode 100644
index 00000000000000..5f3dc45ba77458
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-bf16-64-intel.s
@@ -0,0 +1,3014 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x58,0xf0]
+ vaddnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x58,0xf0]
+ vaddnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x58,0xf0]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x58,0xf0]
+ vaddnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x58,0xf0]
+ vaddnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x58,0xf0]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x58,0xf0]
+ vaddnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x58,0xf0]
+ vaddnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x58,0xf0]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x58,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vaddnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x58,0x71,0x7f]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x58,0x72,0x80]
+ vaddnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x58,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vaddnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x58,0x71,0x7f]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x58,0x72,0x80]
+ vaddnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x58,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vaddnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x58,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vaddnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x58,0x35,0x00,0x00,0x00,0x00]
+ vaddnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x58,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vaddnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x58,0x71,0x7f]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x58,0x72,0x80]
+ vaddnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcmppbf16 k5, ymm23, ymm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x20,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, ymm23, ymm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x27,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymm24, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x00,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, xmm23, xmm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x07,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmm24, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x40,0xc2,0xe8,0x7b]
+ vcmppbf16 k5, zmm23, zmm24, 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+// CHECK: encoding: [0x62,0x93,0x47,0x47,0xc2,0xe8,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmm24, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x40,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, zmm23, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x47,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x50,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, zmm23, word ptr [rip]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x40,0xc2,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vcmppbf16 k5, zmm23, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x47,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x57,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, zmm23, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x00,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, xmm23, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x07,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x10,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, xmm23, word ptr [rip]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x00,0xc2,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vcmppbf16 k5, xmm23, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x07,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x17,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, xmm23, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x47,0x20,0xc2,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vcmppbf16 k5, ymm23, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x47,0x27,0xc2,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x30,0xc2,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vcmppbf16 k5, ymm23, word ptr [rip]{1to16}, 123
+
+// CHECK: vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x20,0xc2,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vcmppbf16 k5, ymm23, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x27,0xc2,0x69,0x7f,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x47,0x37,0xc2,0x6a,0x80,0x7b]
+ vcmppbf16 k5 {k7}, ymm23, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vcomsbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xf7]
+ vcomsbf16 xmm22, xmm23
+
+// CHECK: vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomsbf16 xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomsbf16 xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomsbf16 xmm22, word ptr [rip]
+
+// CHECK: vcomsbf16 xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomsbf16 xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x71,0x7f]
+ vcomsbf16 xmm22, word ptr [rcx + 254]
+
+// CHECK: vcomsbf16 xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x2f,0x72,0x80]
+ vcomsbf16 xmm22, word ptr [rdx - 256]
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5e,0xf0]
+ vdivnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5e,0xf0]
+ vdivnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5e,0xf0]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5e,0xf0]
+ vdivnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5e,0xf0]
+ vdivnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5e,0xf0]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5e,0xf0]
+ vdivnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5e,0xf0]
+ vdivnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5e,0xf0]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vdivnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5e,0x71,0x7f]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5e,0x72,0x80]
+ vdivnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vdivnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5e,0x71,0x7f]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5e,0x72,0x80]
+ vdivnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdivnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vdivnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5e,0x35,0x00,0x00,0x00,0x00]
+ vdivnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vdivnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5e,0x71,0x7f]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5e,0x72,0x80]
+ vdivnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x98,0xf0]
+ vfmadd132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x98,0xf0]
+ vfmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x98,0xf0]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x98,0xf0]
+ vfmadd132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x98,0xf0]
+ vfmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x98,0xf0]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x98,0xf0]
+ vfmadd132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x98,0xf0]
+ vfmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x98,0xf0]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x98,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x98,0x72,0x80]
+ vfmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x98,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x98,0x71,0x7f]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x98,0x72,0x80]
+ vfmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x98,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x98,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x98,0x35,0x00,0x00,0x00,0x00]
+ vfmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x98,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x98,0x71,0x7f]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x98,0x72,0x80]
+ vfmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xa8,0xf0]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xa8,0xf0]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xa8,0xf0]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xa8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xa8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xa8,0x72,0x80]
+ vfmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xa8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xa8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xa8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xa8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xa8,0x71,0x7f]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xa8,0x72,0x80]
+ vfmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xb8,0xf0]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xb8,0xf0]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xb8,0xf0]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xb8,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xb8,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xb8,0x72,0x80]
+ vfmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xb8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xb8,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xb8,0x35,0x00,0x00,0x00,0x00]
+ vfmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xb8,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xb8,0x71,0x7f]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xb8,0x72,0x80]
+ vfmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9a,0xf0]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9a,0xf0]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9a,0xf0]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9a,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9a,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9a,0x72,0x80]
+ vfmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9a,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9a,0x35,0x00,0x00,0x00,0x00]
+ vfmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9a,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9a,0x71,0x7f]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9a,0x72,0x80]
+ vfmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xaa,0xf0]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xaa,0xf0]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xaa,0xf0]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xaa,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xaa,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xaa,0x72,0x80]
+ vfmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xaa,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xaa,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xaa,0x35,0x00,0x00,0x00,0x00]
+ vfmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xaa,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xaa,0x71,0x7f]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xaa,0x72,0x80]
+ vfmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xba,0xf0]
+ vfmsub231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xba,0xf0]
+ vfmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xba,0xf0]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xba,0xf0]
+ vfmsub231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xba,0xf0]
+ vfmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xba,0xf0]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xba,0xf0]
+ vfmsub231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xba,0xf0]
+ vfmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xba,0xf0]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xba,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xba,0x72,0x80]
+ vfmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xba,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xba,0x71,0x7f]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xba,0x72,0x80]
+ vfmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xba,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xba,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xba,0x35,0x00,0x00,0x00,0x00]
+ vfmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xba,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xba,0x71,0x7f]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xba,0x72,0x80]
+ vfmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9c,0xf0]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9c,0xf0]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9c,0xf0]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9c,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9c,0x71,0x7f]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9c,0x72,0x80]
+ vfnmadd132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xac,0xf0]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xac,0xf0]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xac,0xf0]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xac,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xac,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xac,0x72,0x80]
+ vfnmadd213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xac,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xac,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xac,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xac,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xac,0x71,0x7f]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xac,0x72,0x80]
+ vfnmadd213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbc,0xf0]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbc,0xf0]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbc,0xf0]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbc,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmadd231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbc,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmadd231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbc,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbc,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmadd231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbc,0x35,0x00,0x00,0x00,0x00]
+ vfnmadd231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbc,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmadd231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbc,0x71,0x7f]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbc,0x72,0x80]
+ vfnmadd231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x9e,0xf0]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x9e,0xf0]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x9e,0xf0]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x9e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub132nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x9e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub132nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x9e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x9e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub132nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x9e,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub132nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x9e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub132nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x9e,0x71,0x7f]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x9e,0x72,0x80]
+ vfnmsub132nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xae,0xf0]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xae,0xf0]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xae,0xf0]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xae,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub213nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xae,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub213nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xae,0x72,0x80]
+ vfnmsub213nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xae,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xae,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub213nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xae,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub213nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xae,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub213nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xae,0x71,0x7f]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xae,0x72,0x80]
+ vfnmsub213nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0xbe,0xf0]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0xbe,0xf0]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0xbe,0xf0]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0xbe,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vfnmsub231nepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0xbe,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vfnmsub231nepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0xbe,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0xbe,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vfnmsub231nepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0xbe,0x35,0x00,0x00,0x00,0x00]
+ vfnmsub231nepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0xbe,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vfnmsub231nepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0xbe,0x71,0x7f]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0xbe,0x72,0x80]
+ vfnmsub231nepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vfpclasspbf16 k5, zmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x48,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, zmm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x4f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, zmm23, 123
+
+// CHECK: vfpclasspbf16 k5, ymm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x28,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, ymm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x2f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, ymm23, 123
+
+// CHECK: vfpclasspbf16 k5, xmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xef,0x7b]
+ vfpclasspbf16 k5, xmm23, 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x0f,0x66,0xef,0x7b]
+ vfpclasspbf16 k5 {k7}, xmm23, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xb3,0x7f,0x08,0x66,0xac,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xd3,0x7f,0x0f,0x66,0xac,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x18,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x08,0x66,0x2c,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x0f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x1f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x38,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x28,0x66,0x2c,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x2f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x3f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x58,0x66,0x2d,0x00,0x00,0x00,0x00,0x7b]
+ vfpclasspbf16 k5, word ptr [rip]{1to32}, 123
+
+// CHECK: vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x48,0x66,0x2c,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vfpclasspbf16 k5, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x4f,0x66,0x69,0x7f,0x7b]
+ vfpclasspbf16 k5 {k7}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xf3,0x7f,0x5f,0x66,0x6a,0x80,0x7b]
+ vfpclasspbf16 k5 {k7}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vgetexppbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xf7]
+ vgetexppbf16 xmm22, xmm23
+
+// CHECK: vgetexppbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x42,0xf7]
+ vgetexppbf16 xmm22 {k7}, xmm23
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x42,0xf7]
+ vgetexppbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vgetexppbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xf7]
+ vgetexppbf16 zmm22, zmm23
+
+// CHECK: vgetexppbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x42,0xf7]
+ vgetexppbf16 zmm22 {k7}, zmm23
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x42,0xf7]
+ vgetexppbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vgetexppbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xf7]
+ vgetexppbf16 ymm22, ymm23
+
+// CHECK: vgetexppbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x42,0xf7]
+ vgetexppbf16 ymm22 {k7}, ymm23
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x42,0xf7]
+ vgetexppbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x42,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vgetexppbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x42,0x71,0x7f]
+ vgetexppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x42,0x72,0x80]
+ vgetexppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x42,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vgetexppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x42,0x71,0x7f]
+ vgetexppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x42,0x72,0x80]
+ vgetexppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x42,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vgetexppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x42,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vgetexppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vgetexppbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x42,0x35,0x00,0x00,0x00,0x00]
+ vgetexppbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x42,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vgetexppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x42,0x71,0x7f]
+ vgetexppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x42,0x72,0x80]
+ vgetexppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vgetmantpbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22, zmm23, 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x26,0xf7,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22, ymm23, 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x26,0xf7,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x26,0xf7,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x26,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vgetmantpbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x26,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vgetmantpbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x26,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vgetmantpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x26,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x26,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vgetmantpbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x26,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vgetmantpbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x26,0x71,0x7f,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x26,0x72,0x80,0x7b]
+ vgetmantpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5f,0xf0]
+ vmaxpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5f,0xf0]
+ vmaxpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5f,0xf0]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5f,0xf0]
+ vmaxpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5f,0xf0]
+ vmaxpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5f,0xf0]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5f,0xf0]
+ vmaxpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5f,0xf0]
+ vmaxpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5f,0xf0]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmaxpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5f,0x71,0x7f]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5f,0x72,0x80]
+ vmaxpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5f,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmaxpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5f,0x71,0x7f]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5f,0x72,0x80]
+ vmaxpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmaxpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmaxpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5f,0x35,0x00,0x00,0x00,0x00]
+ vmaxpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5f,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmaxpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5f,0x71,0x7f]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5f,0x72,0x80]
+ vmaxpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5d,0xf0]
+ vminpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5d,0xf0]
+ vminpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5d,0xf0]
+ vminpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vminpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5d,0xf0]
+ vminpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5d,0xf0]
+ vminpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5d,0xf0]
+ vminpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vminpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5d,0xf0]
+ vminpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5d,0xf0]
+ vminpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5d,0xf0]
+ vminpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vminpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5d,0x71,0x7f]
+ vminpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5d,0x72,0x80]
+ vminpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vminpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5d,0x71,0x7f]
+ vminpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5d,0x72,0x80]
+ vminpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vminpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vminpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5d,0x35,0x00,0x00,0x00,0x00]
+ vminpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vminpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5d,0x71,0x7f]
+ vminpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5d,0x72,0x80]
+ vminpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x59,0xf0]
+ vmulnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x59,0xf0]
+ vmulnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x59,0xf0]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x59,0xf0]
+ vmulnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x59,0xf0]
+ vmulnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x59,0xf0]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x59,0xf0]
+ vmulnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x59,0xf0]
+ vmulnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x59,0xf0]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x59,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmulnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x59,0x71,0x7f]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x59,0x72,0x80]
+ vmulnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x59,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vmulnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x59,0x71,0x7f]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x59,0x72,0x80]
+ vmulnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x59,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmulnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x59,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmulnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x59,0x35,0x00,0x00,0x00,0x00]
+ vmulnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x59,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vmulnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x59,0x71,0x7f]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x59,0x72,0x80]
+ vmulnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrcppbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xf7]
+ vrcppbf16 xmm22, xmm23
+
+// CHECK: vrcppbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4c,0xf7]
+ vrcppbf16 xmm22 {k7}, xmm23
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4c,0xf7]
+ vrcppbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vrcppbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xf7]
+ vrcppbf16 zmm22, zmm23
+
+// CHECK: vrcppbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4c,0xf7]
+ vrcppbf16 zmm22 {k7}, zmm23
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4c,0xf7]
+ vrcppbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vrcppbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xf7]
+ vrcppbf16 ymm22, ymm23
+
+// CHECK: vrcppbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4c,0xf7]
+ vrcppbf16 ymm22 {k7}, ymm23
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4c,0xf7]
+ vrcppbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrcppbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4c,0x71,0x7f]
+ vrcppbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4c,0x72,0x80]
+ vrcppbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrcppbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4c,0x71,0x7f]
+ vrcppbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4c,0x72,0x80]
+ vrcppbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrcppbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrcppbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrcppbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4c,0x35,0x00,0x00,0x00,0x00]
+ vrcppbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrcppbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4c,0x71,0x7f]
+ vrcppbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4c,0x72,0x80]
+ vrcppbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vreducenepbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22, zmm23, 123
+
+// CHECK: vreducenepbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x56,0xf7,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vreducenepbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22, ymm23, 123
+
+// CHECK: vreducenepbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x56,0xf7,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vreducenepbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x56,0xf7,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x56,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vreducenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x56,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vreducenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x56,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vreducenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x56,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vreducenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x56,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vreducenepbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x56,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vreducenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x56,0x71,0x7f,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x56,0x72,0x80,0x7b]
+ vreducenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22, zmm23, 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x4f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22 {k7}, zmm23, 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xcf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, zmm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22, ymm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x2f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22 {k7}, ymm23, 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0xaf,0x08,0xf7,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, ymm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x0f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22 {k7}, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x8f,0x08,0xf7,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, xmm23, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x08,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x0f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x18,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 xmm22, word ptr [rip]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x08,0x08,0x34,0x6d,0x00,0xfe,0xff,0xff,0x7b]
+ vrndscalenepbf16 xmm22, xmmword ptr [2*rbp - 512], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x8f,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032], 123
+
+// CHECK: vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x9f,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x28,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x2f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x38,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 ymm22, word ptr [rip]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x28,0x08,0x34,0x6d,0x00,0xfc,0xff,0xff,0x7b]
+ vrndscalenepbf16 ymm22, ymmword ptr [2*rbp - 1024], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xaf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064], 123
+
+// CHECK: vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xbf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+// CHECK: encoding: [0x62,0xa3,0x7f,0x48,0x08,0xb4,0xf5,0x00,0x00,0x00,0x10,0x7b]
+ vrndscalenepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+// CHECK: encoding: [0x62,0xc3,0x7f,0x4f,0x08,0xb4,0x80,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291], 123
+
+// CHECK: vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x58,0x08,0x35,0x00,0x00,0x00,0x00,0x7b]
+ vrndscalenepbf16 zmm22, word ptr [rip]{1to32}, 123
+
+// CHECK: vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x08,0x34,0x6d,0x00,0xf8,0xff,0xff,0x7b]
+ vrndscalenepbf16 zmm22, zmmword ptr [2*rbp - 2048], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xcf,0x08,0x71,0x7f,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128], 123
+
+// CHECK: vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+// CHECK: encoding: [0x62,0xe3,0x7f,0xdf,0x08,0x72,0x80,0x7b]
+ vrndscalenepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}, 123
+
+// CHECK: vrsqrtpbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xf7]
+ vrsqrtpbf16 xmm22, xmm23
+
+// CHECK: vrsqrtpbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x0f,0x4e,0xf7]
+ vrsqrtpbf16 xmm22 {k7}, xmm23
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x8f,0x4e,0xf7]
+ vrsqrtpbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vrsqrtpbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xf7]
+ vrsqrtpbf16 zmm22, zmm23
+
+// CHECK: vrsqrtpbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x4f,0x4e,0xf7]
+ vrsqrtpbf16 zmm22 {k7}, zmm23
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xcf,0x4e,0xf7]
+ vrsqrtpbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vrsqrtpbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xf7]
+ vrsqrtpbf16 ymm22, ymm23
+
+// CHECK: vrsqrtpbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0x2f,0x4e,0xf7]
+ vrsqrtpbf16 ymm22 {k7}, ymm23
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa6,0x7c,0xaf,0x4e,0xf7]
+ vrsqrtpbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x08,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x0f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x18,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x08,0x4e,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vrsqrtpbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x8f,0x4e,0x71,0x7f]
+ vrsqrtpbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x9f,0x4e,0x72,0x80]
+ vrsqrtpbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x28,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x2f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x38,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x28,0x4e,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vrsqrtpbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xaf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xbf,0x4e,0x72,0x80]
+ vrsqrtpbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x7c,0x48,0x4e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vrsqrtpbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x7c,0x4f,0x4e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vrsqrtpbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0x58,0x4e,0x35,0x00,0x00,0x00,0x00]
+ vrsqrtpbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x7c,0x48,0x4e,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vrsqrtpbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x7c,0xcf,0x4e,0x71,0x7f]
+ vrsqrtpbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x7c,0xdf,0x4e,0x72,0x80]
+ vrsqrtpbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x20,0x2c,0xf0]
+ vscalefpbf16 ymm22, ymm23, ymm24
+
+// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0x27,0x2c,0xf0]
+ vscalefpbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x86,0x44,0xa7,0x2c,0xf0]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x40,0x2c,0xf0]
+ vscalefpbf16 zmm22, zmm23, zmm24
+
+// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x47,0x2c,0xf0]
+ vscalefpbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x86,0x44,0xc7,0x2c,0xf0]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x00,0x2c,0xf0]
+ vscalefpbf16 xmm22, xmm23, xmm24
+
+// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x07,0x2c,0xf0]
+ vscalefpbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x86,0x44,0x87,0x2c,0xf0]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x40,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x47,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0x50,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe6,0x44,0x40,0x2c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vscalefpbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe6,0x44,0xc7,0x2c,0x71,0x7f]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe6,0x44,0xd7,0x2c,0x72,0x80]
+ vscalefpbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x20,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x27,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0x30,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe6,0x44,0x20,0x2c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vscalefpbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe6,0x44,0xa7,0x2c,0x71,0x7f]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe6,0x44,0xb7,0x2c,0x72,0x80]
+ vscalefpbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa6,0x44,0x00,0x2c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vscalefpbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc6,0x44,0x07,0x2c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vscalefpbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x10,0x2c,0x35,0x00,0x00,0x00,0x00]
+ vscalefpbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe6,0x44,0x00,0x2c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vscalefpbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe6,0x44,0x87,0x2c,0x71,0x7f]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe6,0x44,0x97,0x2c,0x72,0x80]
+ vscalefpbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xf7]
+ vsqrtnepbf16 xmm22, xmm23
+
+// CHECK: vsqrtnepbf16 xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x51,0xf7]
+ vsqrtnepbf16 xmm22 {k7}, xmm23
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x51,0xf7]
+ vsqrtnepbf16 xmm22 {k7} {z}, xmm23
+
+// CHECK: vsqrtnepbf16 zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xf7]
+ vsqrtnepbf16 zmm22, zmm23
+
+// CHECK: vsqrtnepbf16 zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x51,0xf7]
+ vsqrtnepbf16 zmm22 {k7}, zmm23
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xcf,0x51,0xf7]
+ vsqrtnepbf16 zmm22 {k7} {z}, zmm23
+
+// CHECK: vsqrtnepbf16 ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xf7]
+ vsqrtnepbf16 ymm22, ymm23
+
+// CHECK: vsqrtnepbf16 ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x51,0xf7]
+ vsqrtnepbf16 ymm22 {k7}, ymm23
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0xaf,0x51,0xf7]
+ vsqrtnepbf16 ymm22 {k7} {z}, ymm23
+
+// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 xmm22, word ptr [rip]{1to8}
+
+// CHECK: vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsqrtnepbf16 xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x51,0x71,0x7f]
+ vsqrtnepbf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x51,0x72,0x80]
+ vsqrtnepbf16 xmm22 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 ymm22, word ptr [rip]{1to16}
+
+// CHECK: vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsqrtnepbf16 ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x51,0x71,0x7f]
+ vsqrtnepbf16 ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x51,0x72,0x80]
+ vsqrtnepbf16 ymm22 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsqrtnepbf16 zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x51,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsqrtnepbf16 zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x51,0x35,0x00,0x00,0x00,0x00]
+ vsqrtnepbf16 zmm22, word ptr [rip]{1to32}
+
+// CHECK: vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x51,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsqrtnepbf16 zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x51,0x71,0x7f]
+ vsqrtnepbf16 zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x51,0x72,0x80]
+ vsqrtnepbf16 zmm22 {k7} {z}, word ptr [rdx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x20,0x5c,0xf0]
+ vsubnepbf16 ymm22, ymm23, ymm24
+
+// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0x27,0x5c,0xf0]
+ vsubnepbf16 ymm22 {k7}, ymm23, ymm24
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+// CHECK: encoding: [0x62,0x85,0x45,0xa7,0x5c,0xf0]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, ymm24
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x40,0x5c,0xf0]
+ vsubnepbf16 zmm22, zmm23, zmm24
+
+// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x47,0x5c,0xf0]
+ vsubnepbf16 zmm22 {k7}, zmm23, zmm24
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+// CHECK: encoding: [0x62,0x85,0x45,0xc7,0x5c,0xf0]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, zmm24
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x00,0x5c,0xf0]
+ vsubnepbf16 xmm22, xmm23, xmm24
+
+// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x07,0x5c,0xf0]
+ vsubnepbf16 xmm22 {k7}, xmm23, xmm24
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+// CHECK: encoding: [0x62,0x85,0x45,0x87,0x5c,0xf0]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, xmm24
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x40,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x47,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0x50,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 zmm22, zmm23, word ptr [rip]{1to32}
+
+// CHECK: vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x45,0x40,0x5c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsubnepbf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x45,0xc7,0x5c,0x71,0x7f]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+// CHECK: encoding: [0x62,0xe5,0x45,0xd7,0x5c,0x72,0x80]
+ vsubnepbf16 zmm22 {k7} {z}, zmm23, word ptr [rdx - 256]{1to32}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x20,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x27,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0x30,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 ymm22, ymm23, word ptr [rip]{1to16}
+
+// CHECK: vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x45,0x20,0x5c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsubnepbf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x45,0xa7,0x5c,0x71,0x7f]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x45,0xb7,0x5c,0x72,0x80]
+ vsubnepbf16 ymm22 {k7} {z}, ymm23, word ptr [rdx - 256]{1to16}
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x45,0x00,0x5c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsubnepbf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x45,0x07,0x5c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsubnepbf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x10,0x5c,0x35,0x00,0x00,0x00,0x00]
+ vsubnepbf16 xmm22, xmm23, word ptr [rip]{1to8}
+
+// CHECK: vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x45,0x00,0x5c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsubnepbf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x45,0x87,0x5c,0x71,0x7f]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x45,0x97,0x5c,0x72,0x80]
+ vsubnepbf16 xmm22 {k7} {z}, xmm23, word ptr [rdx - 256]{1to8}
+
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index b88abbb461d087..286fb4904870c2 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1176,6 +1176,8 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VCOMISSrr, X86::VCOMISSrm, 0},
{X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VCOMSBF16Zrr, X86::VCOMSBF16Zrm, 0},
+ {X86::VCOMSBF16Zrr_Int, X86::VCOMSBF16Zrm_Int, TB_NO_REVERSE},
{X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0},
{X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0},
@@ -1461,6 +1463,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VEXPANDPSZ128rr, X86::VEXPANDPSZ128rm, TB_NO_REVERSE},
{X86::VEXPANDPSZ256rr, X86::VEXPANDPSZ256rm, TB_NO_REVERSE},
{X86::VEXPANDPSZrr, X86::VEXPANDPSZrm, TB_NO_REVERSE},
+ {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rm, 0},
+ {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rm, 0},
+ {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrm, 0},
{X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0},
{X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0},
{X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0},
@@ -1479,6 +1484,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VFRCZPSrr, X86::VFRCZPSrm, 0},
{X86::VFRCZSDrr, X86::VFRCZSDrm, TB_NO_REVERSE},
{X86::VFRCZSSrr, X86::VFRCZSSrm, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128m, 0},
+ {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256m, 0},
+ {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zm, 0},
{X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0},
{X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0},
{X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0},
@@ -1488,6 +1496,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0},
{X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0},
{X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0},
+ {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmi, 0},
+ {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmi, 0},
+ {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmi, 0},
{X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0},
{X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0},
{X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0},
@@ -1821,11 +1832,17 @@ static const X86FoldTableEntry Table1[] = {
{X86::VRCP14PSZr, X86::VRCP14PSZm, 0},
{X86::VRCP28PDZr, X86::VRCP28PDZm, 0},
{X86::VRCP28PSZr, X86::VRCP28PSZm, 0},
+ {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128m, 0},
+ {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256m, 0},
+ {X86::VRCPPBF16Zr, X86::VRCPPBF16Zm, 0},
{X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0},
{X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0},
{X86::VRCPPHZr, X86::VRCPPHZm, 0},
{X86::VRCPPSYr, X86::VRCPPSYm, 0},
{X86::VRCPPSr, X86::VRCPPSm, 0},
+ {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmi, 0},
+ {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmi, 0},
+ {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmi, 0},
{X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0},
{X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0},
{X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0},
@@ -1835,6 +1852,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0},
{X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0},
{X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0},
+ {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmi, 0},
+ {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmi, 0},
+ {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmi, 0},
{X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0},
{X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0},
{X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0},
@@ -1856,11 +1876,17 @@ static const X86FoldTableEntry Table1[] = {
{X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0},
{X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0},
{X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0},
+ {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128m, 0},
+ {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256m, 0},
+ {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zm, 0},
{X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0},
{X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0},
{X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0},
{X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0},
{X86::VRSQRTPSr, X86::VRSQRTPSm, 0},
+ {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128m, 0},
+ {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256m, 0},
+ {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zm, 0},
{X86::VSQRTPDYr, X86::VSQRTPDYm, 0},
{X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0},
{X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0},
@@ -2335,6 +2361,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16},
{X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16},
{X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16},
+ {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rm, 0},
+ {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rm, 0},
+ {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrm, 0},
{X86::VADDPDYrr, X86::VADDPDYrm, 0},
{X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0},
{X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0},
@@ -2432,6 +2461,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VBROADCASTSSZ128rrkz, X86::VBROADCASTSSZ128rmkz, TB_NO_REVERSE},
{X86::VBROADCASTSSZ256rrkz, X86::VBROADCASTSSZ256rmkz, TB_NO_REVERSE},
{X86::VBROADCASTSSZrrkz, X86::VBROADCASTSSZrmkz, TB_NO_REVERSE},
+ {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmi, 0},
+ {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmi, 0},
+ {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmi, 0},
{X86::VCMPPDYrri, X86::VCMPPDYrmi, 0},
{X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0},
{X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0},
@@ -2737,6 +2769,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0},
{X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0},
{X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0},
+ {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rm, 0},
+ {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rm, 0},
+ {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrm, 0},
{X86::VDIVPDYrr, X86::VDIVPDYrm, 0},
{X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0},
{X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0},
@@ -2819,6 +2854,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE},
{X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0},
{X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE},
+ {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmk, 0},
+ {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmk, 0},
+ {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmk, 0},
{X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0},
{X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0},
{X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0},
@@ -2831,6 +2869,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE},
{X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE},
{X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mkz, 0},
+ {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mkz, 0},
+ {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmkz, 0},
{X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0},
{X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0},
{X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0},
@@ -2843,6 +2884,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE},
{X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE},
{X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE},
+ {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmikz, 0},
+ {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmikz, 0},
+ {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmikz, 0},
{X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0},
{X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0},
{X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0},
@@ -2910,6 +2954,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0},
{X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0},
{X86::VMAXCSSrr, X86::VMAXCSSrm, 0},
+ {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rm, 0},
+ {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rm, 0},
+ {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrm, 0},
{X86::VMAXPDYrr, X86::VMAXPDYrm, 0},
{X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0},
{X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0},
@@ -2966,6 +3013,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMINMAXSDrri, X86::VMINMAXSDrmi, TB_NO_REVERSE},
{X86::VMINMAXSHrri, X86::VMINMAXSHrmi, TB_NO_REVERSE},
{X86::VMINMAXSSrri, X86::VMINMAXSSrmi, TB_NO_REVERSE},
+ {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rm, 0},
+ {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rm, 0},
+ {X86::VMINPBF16Zrr, X86::VMINPBF16Zrm, 0},
{X86::VMINPDYrr, X86::VMINPDYrm, 0},
{X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0},
{X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0},
@@ -3037,6 +3087,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VMPSADBWZ256rri, X86::VMPSADBWZ256rmi, 0},
{X86::VMPSADBWZrri, X86::VMPSADBWZrmi, 0},
{X86::VMPSADBWrri, X86::VMPSADBWrmi, 0},
+ {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rm, 0},
+ {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rm, 0},
+ {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrm, 0},
{X86::VMULPDYrr, X86::VMULPDYrm, 0},
{X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0},
{X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0},
@@ -3887,12 +3940,18 @@ static const X86FoldTableEntry Table2[] = {
{X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0},
{X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE},
{X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE},
+ {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mkz, 0},
+ {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mkz, 0},
+ {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmkz, 0},
{X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0},
{X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0},
{X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0},
{X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE},
{X86::VRCPSSr, X86::VRCPSSm, 0},
{X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE},
+ {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmikz, 0},
+ {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmikz, 0},
+ {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmikz, 0},
{X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0},
{X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0},
{X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0},
@@ -3905,6 +3964,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE},
{X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE},
{X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE},
+ {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmikz, 0},
+ {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmikz, 0},
+ {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmikz, 0},
{X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0},
{X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0},
{X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0},
@@ -3936,12 +3998,18 @@ static const X86FoldTableEntry Table2[] = {
{X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0},
{X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE},
{X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE},
+ {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mkz, 0},
+ {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mkz, 0},
+ {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmkz, 0},
{X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0},
{X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0},
{X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0},
{X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE},
{X86::VRSQRTSSr, X86::VRSQRTSSm, 0},
{X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rm, 0},
+ {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rm, 0},
+ {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrm, 0},
{X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0},
{X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0},
{X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0},
@@ -3976,6 +4044,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
{X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
{X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
+ {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
+ {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},
+ {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmkz, 0},
{X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0},
{X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0},
{X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0},
@@ -3995,6 +4066,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE},
{X86::VSQRTSSr, X86::VSQRTSSm, 0},
{X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rm, 0},
+ {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rm, 0},
+ {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrm, 0},
{X86::VSUBPDYrr, X86::VSUBPDYrm, 0},
{X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0},
{X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0},
@@ -4069,6 +4143,9 @@ static const X86FoldTableEntry Table2[] = {
};
static const X86FoldTableEntry Table3[] = {
+ {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmkz, 0},
+ {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmkz, 0},
+ {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmkz, 0},
{X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0},
{X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0},
{X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0},
@@ -4115,6 +4192,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VBROADCASTSSZ128rrk, X86::VBROADCASTSSZ128rmk, TB_NO_REVERSE},
{X86::VBROADCASTSSZ256rrk, X86::VBROADCASTSSZ256rmk, TB_NO_REVERSE},
{X86::VBROADCASTSSZrrk, X86::VBROADCASTSSZrmk, TB_NO_REVERSE},
+ {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmik, 0},
+ {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmik, 0},
+ {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmik, 0},
{X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0},
{X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0},
{X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0},
@@ -4367,6 +4447,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0},
{X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0},
{X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0},
+ {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmkz, 0},
+ {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmkz, 0},
+ {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmkz, 0},
{X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0},
{X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0},
{X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0},
@@ -4409,6 +4492,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmi, 0},
{X86::VFIXUPIMMSDZrri, X86::VFIXUPIMMSDZrmi, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrri, X86::VFIXUPIMMSSZrmi, TB_NO_REVERSE},
+ {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128m, 0},
+ {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256m, 0},
+ {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zm, 0},
{X86::VFMADD132PDYr, X86::VFMADD132PDYm, 0},
{X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, 0},
{X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0},
@@ -4432,6 +4518,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE},
{X86::VFMADD132SSr, X86::VFMADD132SSm, 0},
{X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_NO_REVERSE},
+ {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128m, 0},
+ {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256m, 0},
+ {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zm, 0},
{X86::VFMADD213PDYr, X86::VFMADD213PDYm, 0},
{X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, 0},
{X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0},
@@ -4455,6 +4544,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE},
{X86::VFMADD213SSr, X86::VFMADD213SSm, 0},
{X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_NO_REVERSE},
+ {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128m, 0},
+ {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256m, 0},
+ {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zm, 0},
{X86::VFMADD231PDYr, X86::VFMADD231PDYm, 0},
{X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, 0},
{X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0},
@@ -4533,6 +4625,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, 0},
{X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, 0},
{X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, 0},
+ {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128m, 0},
+ {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256m, 0},
+ {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zm, 0},
{X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, 0},
{X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, 0},
{X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0},
@@ -4556,6 +4651,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE},
{X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0},
{X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_NO_REVERSE},
+ {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128m, 0},
+ {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256m, 0},
+ {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zm, 0},
{X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, 0},
{X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, 0},
{X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0},
@@ -4579,6 +4677,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE},
{X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0},
{X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_NO_REVERSE},
+ {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128m, 0},
+ {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256m, 0},
+ {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zm, 0},
{X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, 0},
{X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, 0},
{X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0},
@@ -4657,6 +4758,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0},
{X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0},
{X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE},
+ {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128m, 0},
+ {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256m, 0},
+ {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zm, 0},
{X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0},
{X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0},
{X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0},
@@ -4680,6 +4784,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE},
{X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0},
{X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128m, 0},
+ {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256m, 0},
+ {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zm, 0},
{X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, 0},
{X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, 0},
{X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0},
@@ -4703,6 +4810,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE},
{X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0},
{X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128m, 0},
+ {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256m, 0},
+ {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zm, 0},
{X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, 0},
{X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, 0},
{X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0},
@@ -4734,6 +4844,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE},
{X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0},
{X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128m, 0},
+ {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256m, 0},
+ {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zm, 0},
{X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, 0},
{X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, 0},
{X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0},
@@ -4757,6 +4870,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE},
{X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0},
{X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128m, 0},
+ {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256m, 0},
+ {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zm, 0},
{X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, 0},
{X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, 0},
{X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0},
@@ -4780,6 +4896,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE},
{X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0},
{X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_NO_REVERSE},
+ {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128m, 0},
+ {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256m, 0},
+ {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zm, 0},
{X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, 0},
{X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, 0},
{X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0},
@@ -4811,6 +4930,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE},
{X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0},
{X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE},
+ {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mk, 0},
+ {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mk, 0},
+ {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmk, 0},
{X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0},
{X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0},
{X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0},
@@ -4823,6 +4945,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE},
{X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE},
{X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE},
+ {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmik, 0},
+ {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmik, 0},
+ {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmik, 0},
{X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0},
{X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0},
{X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0},
@@ -4865,6 +4990,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0},
{X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0},
{X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0},
+ {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmkz, 0},
+ {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmkz, 0},
+ {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmkz, 0},
{X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0},
{X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0},
{X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0},
@@ -4901,6 +5029,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMINMAXSDrrikz, X86::VMINMAXSDrmikz, TB_NO_REVERSE},
{X86::VMINMAXSHrrikz, X86::VMINMAXSHrmikz, TB_NO_REVERSE},
{X86::VMINMAXSSrrikz, X86::VMINMAXSSrmikz, TB_NO_REVERSE},
+ {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0},
+ {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0},
+ {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0},
{X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0},
{X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0},
{X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0},
@@ -4955,6 +5086,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VMPSADBWZ128rrikz, X86::VMPSADBWZ128rmikz, 0},
{X86::VMPSADBWZ256rrikz, X86::VMPSADBWZ256rmikz, 0},
{X86::VMPSADBWZrrikz, X86::VMPSADBWZrmikz, 0},
+ {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmkz, 0},
+ {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmkz, 0},
+ {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmkz, 0},
{X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0},
{X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0},
{X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0},
@@ -5696,10 +5830,16 @@ static const X86FoldTableEntry Table3[] = {
{X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0},
{X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE},
{X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE},
+ {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mk, 0},
+ {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mk, 0},
+ {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmk, 0},
{X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0},
{X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0},
{X86::VRCPPHZrk, X86::VRCPPHZmk, 0},
{X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE},
+ {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmik, 0},
+ {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmik, 0},
+ {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmik, 0},
{X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0},
{X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0},
{X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0},
@@ -5712,6 +5852,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE},
{X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE},
{X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE},
+ {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmik, 0},
+ {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmik, 0},
+ {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmik, 0},
{X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0},
{X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0},
{X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0},
@@ -5736,10 +5879,16 @@ static const X86FoldTableEntry Table3[] = {
{X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0},
{X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE},
{X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE},
+ {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mk, 0},
+ {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mk, 0},
+ {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmk, 0},
{X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0},
{X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0},
{X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0},
{X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmkz, 0},
+ {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmkz, 0},
+ {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmkz, 0},
{X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0},
{X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0},
{X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0},
@@ -5769,6 +5918,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VSM3MSG1rr, X86::VSM3MSG1rm, 0},
{X86::VSM3MSG2rr, X86::VSM3MSG2rm, 0},
{X86::VSM3RNDS2rr, X86::VSM3RNDS2rm, 0},
+ {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mk, 0},
+ {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mk, 0},
+ {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmk, 0},
{X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0},
{X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0},
{X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0},
@@ -5781,6 +5933,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE},
{X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE},
{X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmkz, 0},
+ {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmkz, 0},
+ {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmkz, 0},
{X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0},
{X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0},
{X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0},
@@ -5814,6 +5969,9 @@ static const X86FoldTableEntry Table3[] = {
};
static const X86FoldTableEntry Table4[] = {
+ {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmk, 0},
+ {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmk, 0},
+ {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmk, 0},
{X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0},
{X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0},
{X86::VADDPDZrrk, X86::VADDPDZrmk, 0},
@@ -5883,6 +6041,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0},
{X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0},
{X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0},
+ {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmk, 0},
+ {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmk, 0},
+ {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmk, 0},
{X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0},
{X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0},
{X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0},
@@ -5935,6 +6096,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFIXUPIMMSDZrrikz, X86::VFIXUPIMMSDZrmikz, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrrik, X86::VFIXUPIMMSSZrmik, TB_NO_REVERSE},
{X86::VFIXUPIMMSSZrrikz, X86::VFIXUPIMMSSZrmikz, TB_NO_REVERSE},
+ {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mk, 0},
+ {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mkz, 0},
+ {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mk, 0},
+ {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mkz, 0},
+ {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmk, 0},
+ {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmkz, 0},
{X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mk, 0},
{X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mkz, 0},
{X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mk, 0},
@@ -5959,6 +6126,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE},
{X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mk, 0},
+ {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mkz, 0},
+ {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mk, 0},
+ {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mkz, 0},
+ {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmk, 0},
+ {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmkz, 0},
{X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0},
{X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mkz, 0},
{X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mk, 0},
@@ -5983,6 +6156,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE},
{X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mk, 0},
+ {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mkz, 0},
+ {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mk, 0},
+ {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mkz, 0},
+ {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmk, 0},
+ {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmkz, 0},
{X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0},
{X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mkz, 0},
{X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mk, 0},
@@ -6069,6 +6248,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mkz, 0},
{X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmk, 0},
{X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmkz, 0},
+ {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mk, 0},
+ {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mkz, 0},
+ {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mk, 0},
+ {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mkz, 0},
+ {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmk, 0},
+ {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmkz, 0},
{X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mk, 0},
{X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mkz, 0},
{X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mk, 0},
@@ -6093,6 +6278,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE},
{X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mk, 0},
+ {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mkz, 0},
+ {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mk, 0},
+ {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mkz, 0},
+ {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmk, 0},
+ {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmkz, 0},
{X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0},
{X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mkz, 0},
{X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mk, 0},
@@ -6117,6 +6308,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE},
{X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mk, 0},
+ {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mkz, 0},
+ {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mk, 0},
+ {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mkz, 0},
+ {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmk, 0},
+ {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmkz, 0},
{X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0},
{X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mkz, 0},
{X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mk, 0},
@@ -6199,6 +6396,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0},
{X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0},
{X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE},
+ {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mk, 0},
+ {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mkz, 0},
+ {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mk, 0},
+ {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mkz, 0},
+ {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmk, 0},
+ {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmkz, 0},
{X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0},
{X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0},
{X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0},
@@ -6223,6 +6426,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mk, 0},
+ {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mkz, 0},
+ {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mk, 0},
+ {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mkz, 0},
+ {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmk, 0},
+ {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmkz, 0},
{X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0},
{X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mkz, 0},
{X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mk, 0},
@@ -6247,6 +6456,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mk, 0},
+ {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mkz, 0},
+ {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mk, 0},
+ {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mkz, 0},
+ {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmk, 0},
+ {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmkz, 0},
{X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0},
{X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mkz, 0},
{X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mk, 0},
@@ -6271,6 +6486,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mk, 0},
+ {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mk, 0},
+ {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmk, 0},
+ {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmkz, 0},
{X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0},
{X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mkz, 0},
{X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mk, 0},
@@ -6295,6 +6516,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mk, 0},
+ {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mk, 0},
+ {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmk, 0},
+ {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmkz, 0},
{X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0},
{X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mkz, 0},
{X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mk, 0},
@@ -6319,6 +6546,12 @@ static const X86FoldTableEntry Table4[] = {
{X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE},
{X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE},
{X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE},
+ {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mk, 0},
+ {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mkz, 0},
+ {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mk, 0},
+ {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mkz, 0},
+ {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmk, 0},
+ {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmkz, 0},
{X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0},
{X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mkz, 0},
{X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mk, 0},
@@ -6379,6 +6612,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0},
{X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0},
{X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0},
+ {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmk, 0},
+ {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmk, 0},
+ {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmk, 0},
{X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0},
{X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0},
{X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0},
@@ -6415,6 +6651,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMINMAXSDrrik, X86::VMINMAXSDrmik, TB_NO_REVERSE},
{X86::VMINMAXSHrrik, X86::VMINMAXSHrmik, TB_NO_REVERSE},
{X86::VMINMAXSSrrik, X86::VMINMAXSSrmik, TB_NO_REVERSE},
+ {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0},
+ {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0},
+ {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0},
{X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0},
{X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0},
{X86::VMINPDZrrk, X86::VMINPDZrmk, 0},
@@ -6430,6 +6669,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0},
{X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0},
{X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0},
+ {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmk, 0},
+ {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmk, 0},
+ {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmk, 0},
{X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0},
{X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0},
{X86::VMULPDZrrk, X86::VMULPDZrmk, 0},
@@ -7005,6 +7247,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE},
{X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE},
{X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE},
+ {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmk, 0},
+ {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmk, 0},
+ {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmk, 0},
{X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0},
{X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0},
{X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0},
@@ -7034,6 +7279,9 @@ static const X86FoldTableEntry Table4[] = {
{X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE},
{X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE},
{X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE},
+ {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmk, 0},
+ {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmk, 0},
+ {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmk, 0},
{X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0},
{X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0},
{X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0},
@@ -7264,6 +7512,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTW2PHZrr, X86::VCVTW2PHZrmb, TB_BCAST_W},
{X86::VEXP2PDZr, X86::VEXP2PDZmb, TB_BCAST_SD},
{X86::VEXP2PSZr, X86::VEXP2PSZmb, TB_BCAST_SS},
+ {X86::VFPCLASSPBF16Z128rr, X86::VFPCLASSPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Z256rr, X86::VFPCLASSPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Zrr, X86::VFPCLASSPBF16Zrmb, TB_BCAST_SH},
{X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rmb, TB_BCAST_SD},
{X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rmb, TB_BCAST_SD},
{X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrmb, TB_BCAST_SD},
@@ -7273,6 +7524,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rmb, TB_BCAST_SS},
{X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rmb, TB_BCAST_SS},
{X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrmb, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128r, X86::VGETEXPPBF16Z128mb, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256r, X86::VGETEXPPBF16Z256mb, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zr, X86::VGETEXPPBF16Zmb, TB_BCAST_SH},
{X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128mb, TB_BCAST_SD},
{X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256mb, TB_BCAST_SD},
{X86::VGETEXPPDZr, X86::VGETEXPPDZmb, TB_BCAST_SD},
@@ -7282,6 +7536,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128mb, TB_BCAST_SS},
{X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256mb, TB_BCAST_SS},
{X86::VGETEXPPSZr, X86::VGETEXPPSZmb, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rri, X86::VGETMANTPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rri, X86::VGETMANTPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrri, X86::VGETMANTPBF16Zrmbi, TB_BCAST_SH},
{X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmbi, TB_BCAST_SD},
{X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmbi, TB_BCAST_SD},
{X86::VGETMANTPDZrri, X86::VGETMANTPDZrmbi, TB_BCAST_SD},
@@ -7366,9 +7623,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VRCP14PSZr, X86::VRCP14PSZmb, TB_BCAST_SS},
{X86::VRCP28PDZr, X86::VRCP28PDZmb, TB_BCAST_SD},
{X86::VRCP28PSZr, X86::VRCP28PSZmb, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128r, X86::VRCPPBF16Z128mb, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256r, X86::VRCPPBF16Z256mb, TB_BCAST_SH},
+ {X86::VRCPPBF16Zr, X86::VRCPPBF16Zmb, TB_BCAST_SH},
{X86::VRCPPHZ128r, X86::VRCPPHZ128mb, TB_BCAST_SH},
{X86::VRCPPHZ256r, X86::VRCPPHZ256mb, TB_BCAST_SH},
{X86::VRCPPHZr, X86::VRCPPHZmb, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rri, X86::VREDUCENEPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rri, X86::VREDUCENEPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrri, X86::VREDUCENEPBF16Zrmbi, TB_BCAST_SH},
{X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmbi, TB_BCAST_SD},
{X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmbi, TB_BCAST_SD},
{X86::VREDUCEPDZrri, X86::VREDUCEPDZrmbi, TB_BCAST_SD},
@@ -7378,6 +7641,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmbi, TB_BCAST_SS},
{X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmbi, TB_BCAST_SS},
{X86::VREDUCEPSZrri, X86::VREDUCEPSZrmbi, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rri, X86::VRNDSCALENEPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rri, X86::VRNDSCALENEPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrri, X86::VRNDSCALENEPBF16Zrmbi, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmbi, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmbi, TB_BCAST_SD},
{X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmbi, TB_BCAST_SD},
@@ -7395,9 +7661,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VRSQRT14PSZr, X86::VRSQRT14PSZmb, TB_BCAST_SS},
{X86::VRSQRT28PDZr, X86::VRSQRT28PDZmb, TB_BCAST_SD},
{X86::VRSQRT28PSZr, X86::VRSQRT28PSZmb, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128r, X86::VRSQRTPBF16Z128mb, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256r, X86::VRSQRTPBF16Z256mb, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zr, X86::VRSQRTPBF16Zmb, TB_BCAST_SH},
{X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128mb, TB_BCAST_SH},
{X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256mb, TB_BCAST_SH},
{X86::VRSQRTPHZr, X86::VRSQRTPHZmb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z128r, X86::VSQRTNEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256r, X86::VSQRTNEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zr, X86::VSQRTNEPBF16Zmb, TB_BCAST_SH},
{X86::VSQRTPDZ128r, X86::VSQRTPDZ128mb, TB_BCAST_SD},
{X86::VSQRTPDZ256r, X86::VSQRTPDZ256mb, TB_BCAST_SD},
{X86::VSQRTPDZr, X86::VSQRTPDZmb, TB_BCAST_SD},
@@ -7410,6 +7682,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
};
static const X86FoldTableEntry BroadcastTable2[] = {
+ {X86::VADDNEPBF16Z128rr, X86::VADDNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rr, X86::VADDNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrr, X86::VADDNEPBF16Zrmb, TB_BCAST_SH},
{X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD},
{X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD},
{X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD},
@@ -7443,6 +7718,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VBLENDMPSZ128rr, X86::VBLENDMPSZ128rmb, TB_BCAST_SS},
{X86::VBLENDMPSZ256rr, X86::VBLENDMPSZ256rmb, TB_BCAST_SS},
{X86::VBLENDMPSZrr, X86::VBLENDMPSZrmb, TB_BCAST_SS},
+ {X86::VCMPPBF16Z128rri, X86::VCMPPBF16Z128rmbi, TB_BCAST_SH},
+ {X86::VCMPPBF16Z256rri, X86::VCMPPBF16Z256rmbi, TB_BCAST_SH},
+ {X86::VCMPPBF16Zrri, X86::VCMPPBF16Zrmbi, TB_BCAST_SH},
{X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD},
{X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD},
{X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD},
@@ -7677,6 +7955,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmbkz, TB_BCAST_W},
{X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmbkz, TB_BCAST_W},
{X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmbkz, TB_BCAST_W},
+ {X86::VDIVNEPBF16Z128rr, X86::VDIVNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rr, X86::VDIVNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrr, X86::VDIVNEPBF16Zrmb, TB_BCAST_SH},
{X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD},
{X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD},
{X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD},
@@ -7694,6 +7975,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rmb, TB_BCAST_SS},
{X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rmb, TB_BCAST_SS},
{X86::VFMULCPHZrr, X86::VFMULCPHZrmb, TB_BCAST_SS},
+ {X86::VFPCLASSPBF16Z128rrk, X86::VFPCLASSPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Z256rrk, X86::VFPCLASSPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VFPCLASSPBF16Zrrk, X86::VFPCLASSPBF16Zrmbk, TB_BCAST_SH},
{X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmbk, TB_BCAST_SD},
{X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmbk, TB_BCAST_SD},
{X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmbk, TB_BCAST_SD},
@@ -7703,6 +7987,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmbk, TB_BCAST_SS},
{X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmbk, TB_BCAST_SS},
{X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmbk, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128rkz, X86::VGETEXPPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256rkz, X86::VGETEXPPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zrkz, X86::VGETEXPPBF16Zmbkz, TB_BCAST_SH},
{X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mbkz, TB_BCAST_SD},
{X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mbkz, TB_BCAST_SD},
{X86::VGETEXPPDZrkz, X86::VGETEXPPDZmbkz, TB_BCAST_SD},
@@ -7712,6 +7999,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mbkz, TB_BCAST_SS},
{X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mbkz, TB_BCAST_SS},
{X86::VGETEXPPSZrkz, X86::VGETEXPPSZmbkz, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rrikz, X86::VGETMANTPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rrikz, X86::VGETMANTPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrrikz, X86::VGETMANTPBF16Zrmbikz, TB_BCAST_SH},
{X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmbikz, TB_BCAST_SD},
{X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmbikz, TB_BCAST_SD},
{X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmbikz, TB_BCAST_SD},
@@ -7736,6 +8026,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS},
{X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS},
{X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rr, X86::VMAXPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rr, X86::VMAXPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrr, X86::VMAXPBF16Zrmb, TB_BCAST_SH},
{X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD},
{X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD},
{X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD},
@@ -7766,6 +8059,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMINMAXPSZ128rri, X86::VMINMAXPSZ128rmbi, TB_BCAST_SS},
{X86::VMINMAXPSZ256rri, X86::VMINMAXPSZ256rmbi, TB_BCAST_SS},
{X86::VMINMAXPSZrri, X86::VMINMAXPSZrmbi, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rr, X86::VMINPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rr, X86::VMINPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMINPBF16Zrr, X86::VMINPBF16Zrmb, TB_BCAST_SH},
{X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD},
{X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD},
{X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD},
@@ -7775,6 +8071,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS},
{X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS},
{X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rr, X86::VMULNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rr, X86::VMULNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrr, X86::VMULNEPBF16Zrmb, TB_BCAST_SH},
{X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD},
{X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD},
{X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD},
@@ -8068,9 +8367,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VRCP14PSZrkz, X86::VRCP14PSZmbkz, TB_BCAST_SS},
{X86::VRCP28PDZrkz, X86::VRCP28PDZmbkz, TB_BCAST_SD},
{X86::VRCP28PSZrkz, X86::VRCP28PSZmbkz, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128rkz, X86::VRCPPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256rkz, X86::VRCPPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VRCPPBF16Zrkz, X86::VRCPPBF16Zmbkz, TB_BCAST_SH},
{X86::VRCPPHZ128rkz, X86::VRCPPHZ128mbkz, TB_BCAST_SH},
{X86::VRCPPHZ256rkz, X86::VRCPPHZ256mbkz, TB_BCAST_SH},
{X86::VRCPPHZrkz, X86::VRCPPHZmbkz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rrikz, X86::VREDUCENEPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rrikz, X86::VREDUCENEPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrrikz, X86::VREDUCENEPBF16Zrmbikz, TB_BCAST_SH},
{X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmbikz, TB_BCAST_SD},
{X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmbikz, TB_BCAST_SD},
{X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmbikz, TB_BCAST_SD},
@@ -8080,6 +8385,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmbikz, TB_BCAST_SS},
{X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmbikz, TB_BCAST_SS},
{X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmbikz, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rrikz, X86::VRNDSCALENEPBF16Z128rmbikz, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rrikz, X86::VRNDSCALENEPBF16Z256rmbikz, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrrikz, X86::VRNDSCALENEPBF16Zrmbikz, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmbikz, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmbikz, TB_BCAST_SD},
{X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmbikz, TB_BCAST_SD},
@@ -8097,9 +8405,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VRSQRT14PSZrkz, X86::VRSQRT14PSZmbkz, TB_BCAST_SS},
{X86::VRSQRT28PDZrkz, X86::VRSQRT28PDZmbkz, TB_BCAST_SD},
{X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmbkz, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128rkz, X86::VRSQRTPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256rkz, X86::VRSQRTPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zrkz, X86::VRSQRTPBF16Zmbkz, TB_BCAST_SH},
{X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mbkz, TB_BCAST_SH},
{X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mbkz, TB_BCAST_SH},
{X86::VRSQRTPHZrkz, X86::VRSQRTPHZmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z128rr, X86::VSCALEFPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rr, X86::VSCALEFPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrr, X86::VSCALEFPBF16Zrmb, TB_BCAST_SH},
{X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rmb, TB_BCAST_SD},
{X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rmb, TB_BCAST_SD},
{X86::VSCALEFPDZrr, X86::VSCALEFPDZrmb, TB_BCAST_SD},
@@ -8123,6 +8437,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmbi, TB_BCAST_SS},
{X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmbi, TB_BCAST_SS},
{X86::VSHUFPSZrri, X86::VSHUFPSZrmbi, TB_BCAST_SS},
+ {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zrkz, X86::VSQRTNEPBF16Zmbkz, TB_BCAST_SH},
{X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mbkz, TB_BCAST_SD},
{X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mbkz, TB_BCAST_SD},
{X86::VSQRTPDZrkz, X86::VSQRTPDZmbkz, TB_BCAST_SD},
@@ -8132,6 +8449,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mbkz, TB_BCAST_SS},
{X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mbkz, TB_BCAST_SS},
{X86::VSQRTPSZrkz, X86::VSQRTPSZmbkz, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rr, X86::VSUBNEPBF16Z128rmb, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rr, X86::VSUBNEPBF16Z256rmb, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrr, X86::VSUBNEPBF16Zrmb, TB_BCAST_SH},
{X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD},
{X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD},
{X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD},
@@ -8162,6 +8482,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
};
static const X86FoldTableEntry BroadcastTable3[] = {
+ {X86::VADDNEPBF16Z128rrkz, X86::VADDNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rrkz, X86::VADDNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrrkz, X86::VADDNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VADDPDZ128rrkz, X86::VADDPDZ128rmbkz, TB_BCAST_SD},
{X86::VADDPDZ256rrkz, X86::VADDPDZ256rmbkz, TB_BCAST_SD},
{X86::VADDPDZrrkz, X86::VADDPDZrmbkz, TB_BCAST_SD},
@@ -8195,6 +8518,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmbk, TB_BCAST_SS},
{X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmbk, TB_BCAST_SS},
{X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmbk, TB_BCAST_SS},
+ {X86::VCMPPBF16Z128rrik, X86::VCMPPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VCMPPBF16Z256rrik, X86::VCMPPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VCMPPBF16Zrrik, X86::VCMPPBF16Zrmbik, TB_BCAST_SH},
{X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmbik, TB_BCAST_SD},
{X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmbik, TB_BCAST_SD},
{X86::VCMPPDZrrik, X86::VCMPPDZrmbik, TB_BCAST_SD},
@@ -8429,6 +8755,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmbk, TB_BCAST_W},
{X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmbk, TB_BCAST_W},
{X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmbk, TB_BCAST_W},
+ {X86::VDIVNEPBF16Z128rrkz, X86::VDIVNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rrkz, X86::VDIVNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrrkz, X86::VDIVNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmbkz, TB_BCAST_SD},
{X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmbkz, TB_BCAST_SD},
{X86::VDIVPDZrrkz, X86::VDIVPDZrmbkz, TB_BCAST_SD},
@@ -8458,6 +8787,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFIXUPIMMPSZ128rri, X86::VFIXUPIMMPSZ128rmbi, TB_BCAST_SS},
{X86::VFIXUPIMMPSZ256rri, X86::VFIXUPIMMPSZ256rmbi, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmbi, TB_BCAST_SS},
+ {X86::VFMADD132NEPBF16Z128r, X86::VFMADD132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256r, X86::VFMADD132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zr, X86::VFMADD132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD},
{X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD},
{X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD},
@@ -8467,6 +8799,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS},
{X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS},
{X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS},
+ {X86::VFMADD213NEPBF16Z128r, X86::VFMADD213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256r, X86::VFMADD213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zr, X86::VFMADD213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD},
{X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD},
{X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD},
@@ -8476,6 +8811,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS},
{X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS},
{X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS},
+ {X86::VFMADD231NEPBF16Z128r, X86::VFMADD231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256r, X86::VFMADD231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zr, X86::VFMADD231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD},
{X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD},
{X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD},
@@ -8515,6 +8853,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS},
{X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS},
{X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB132NEPBF16Z128r, X86::VFMSUB132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256r, X86::VFMSUB132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zr, X86::VFMSUB132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD},
@@ -8524,6 +8865,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS},
{X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS},
{X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB213NEPBF16Z128r, X86::VFMSUB213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256r, X86::VFMSUB213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zr, X86::VFMSUB213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD},
@@ -8533,6 +8877,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS},
{X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS},
{X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS},
+ {X86::VFMSUB231NEPBF16Z128r, X86::VFMSUB231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256r, X86::VFMSUB231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zr, X86::VFMSUB231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD},
{X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD},
{X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD},
@@ -8572,6 +8919,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmbkz, TB_BCAST_SS},
{X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmbkz, TB_BCAST_SS},
{X86::VFMULCPHZrrkz, X86::VFMULCPHZrmbkz, TB_BCAST_SS},
+ {X86::VFNMADD132NEPBF16Z128r, X86::VFNMADD132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256r, X86::VFNMADD132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zr, X86::VFNMADD132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD},
@@ -8581,6 +8931,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS},
+ {X86::VFNMADD213NEPBF16Z128r, X86::VFNMADD213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256r, X86::VFNMADD213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zr, X86::VFNMADD213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD},
@@ -8590,6 +8943,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS},
+ {X86::VFNMADD231NEPBF16Z128r, X86::VFNMADD231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256r, X86::VFNMADD231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zr, X86::VFNMADD231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD},
{X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD},
{X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD},
@@ -8599,6 +8955,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS},
{X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS},
{X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB132NEPBF16Z128r, X86::VFNMSUB132NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256r, X86::VFNMSUB132NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zr, X86::VFNMSUB132NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD},
@@ -8608,6 +8967,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB213NEPBF16Z128r, X86::VFNMSUB213NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256r, X86::VFNMSUB213NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zr, X86::VFNMSUB213NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD},
@@ -8617,6 +8979,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS},
+ {X86::VFNMSUB231NEPBF16Z128r, X86::VFNMSUB231NEPBF16Z128mb, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256r, X86::VFNMSUB231NEPBF16Z256mb, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zr, X86::VFNMSUB231NEPBF16Zmb, TB_BCAST_SH},
{X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD},
{X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD},
{X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD},
@@ -8626,6 +8991,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS},
{X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS},
{X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS},
+ {X86::VGETEXPPBF16Z128rk, X86::VGETEXPPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Z256rk, X86::VGETEXPPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VGETEXPPBF16Zrk, X86::VGETEXPPBF16Zmbk, TB_BCAST_SH},
{X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mbk, TB_BCAST_SD},
{X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mbk, TB_BCAST_SD},
{X86::VGETEXPPDZrk, X86::VGETEXPPDZmbk, TB_BCAST_SD},
@@ -8635,6 +9003,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mbk, TB_BCAST_SS},
{X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mbk, TB_BCAST_SS},
{X86::VGETEXPPSZrk, X86::VGETEXPPSZmbk, TB_BCAST_SS},
+ {X86::VGETMANTPBF16Z128rrik, X86::VGETMANTPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Z256rrik, X86::VGETMANTPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VGETMANTPBF16Zrrik, X86::VGETMANTPBF16Zrmbik, TB_BCAST_SH},
{X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmbik, TB_BCAST_SD},
{X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmbik, TB_BCAST_SD},
{X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmbik, TB_BCAST_SD},
@@ -8659,6 +9030,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmbkz, TB_BCAST_SS},
{X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmbkz, TB_BCAST_SS},
{X86::VMAXCPSZrrkz, X86::VMAXCPSZrmbkz, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rrkz, X86::VMAXPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rrkz, X86::VMAXPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrrkz, X86::VMAXPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmbkz, TB_BCAST_SD},
{X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmbkz, TB_BCAST_SD},
{X86::VMAXPDZrrkz, X86::VMAXPDZrmbkz, TB_BCAST_SD},
@@ -8689,6 +9063,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMINMAXPSZ128rrikz, X86::VMINMAXPSZ128rmbikz, TB_BCAST_SS},
{X86::VMINMAXPSZ256rrikz, X86::VMINMAXPSZ256rmbikz, TB_BCAST_SS},
{X86::VMINMAXPSZrrikz, X86::VMINMAXPSZrmbikz, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMINPDZ128rrkz, X86::VMINPDZ128rmbkz, TB_BCAST_SD},
{X86::VMINPDZ256rrkz, X86::VMINPDZ256rmbkz, TB_BCAST_SD},
{X86::VMINPDZrrkz, X86::VMINPDZrmbkz, TB_BCAST_SD},
@@ -8698,6 +9075,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VMINPSZ128rrkz, X86::VMINPSZ128rmbkz, TB_BCAST_SS},
{X86::VMINPSZ256rrkz, X86::VMINPSZ256rmbkz, TB_BCAST_SS},
{X86::VMINPSZrrkz, X86::VMINPSZrmbkz, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rrkz, X86::VMULNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rrkz, X86::VMULNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrrkz, X86::VMULNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VMULPDZ128rrkz, X86::VMULPDZ128rmbkz, TB_BCAST_SD},
{X86::VMULPDZ256rrkz, X86::VMULPDZ256rmbkz, TB_BCAST_SD},
{X86::VMULPDZrrkz, X86::VMULPDZrmbkz, TB_BCAST_SD},
@@ -9081,9 +9461,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VRCP14PSZrk, X86::VRCP14PSZmbk, TB_BCAST_SS},
{X86::VRCP28PDZrk, X86::VRCP28PDZmbk, TB_BCAST_SD},
{X86::VRCP28PSZrk, X86::VRCP28PSZmbk, TB_BCAST_SS},
+ {X86::VRCPPBF16Z128rk, X86::VRCPPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VRCPPBF16Z256rk, X86::VRCPPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VRCPPBF16Zrk, X86::VRCPPBF16Zmbk, TB_BCAST_SH},
{X86::VRCPPHZ128rk, X86::VRCPPHZ128mbk, TB_BCAST_SH},
{X86::VRCPPHZ256rk, X86::VRCPPHZ256mbk, TB_BCAST_SH},
{X86::VRCPPHZrk, X86::VRCPPHZmbk, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z128rrik, X86::VREDUCENEPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Z256rrik, X86::VREDUCENEPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VREDUCENEPBF16Zrrik, X86::VREDUCENEPBF16Zrmbik, TB_BCAST_SH},
{X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmbik, TB_BCAST_SD},
{X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmbik, TB_BCAST_SD},
{X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmbik, TB_BCAST_SD},
@@ -9093,6 +9479,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmbik, TB_BCAST_SS},
{X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmbik, TB_BCAST_SS},
{X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmbik, TB_BCAST_SS},
+ {X86::VRNDSCALENEPBF16Z128rrik, X86::VRNDSCALENEPBF16Z128rmbik, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Z256rrik, X86::VRNDSCALENEPBF16Z256rmbik, TB_BCAST_SH},
+ {X86::VRNDSCALENEPBF16Zrrik, X86::VRNDSCALENEPBF16Zrmbik, TB_BCAST_SH},
{X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmbik, TB_BCAST_SD},
{X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmbik, TB_BCAST_SD},
{X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmbik, TB_BCAST_SD},
@@ -9110,9 +9499,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VRSQRT14PSZrk, X86::VRSQRT14PSZmbk, TB_BCAST_SS},
{X86::VRSQRT28PDZrk, X86::VRSQRT28PDZmbk, TB_BCAST_SD},
{X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmbk, TB_BCAST_SS},
+ {X86::VRSQRTPBF16Z128rk, X86::VRSQRTPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Z256rk, X86::VRSQRTPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VRSQRTPBF16Zrk, X86::VRSQRTPBF16Zmbk, TB_BCAST_SH},
{X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mbk, TB_BCAST_SH},
{X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mbk, TB_BCAST_SH},
{X86::VRSQRTPHZrk, X86::VRSQRTPHZmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z128rrkz, X86::VSCALEFPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rrkz, X86::VSCALEFPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrrkz, X86::VSCALEFPBF16Zrmbkz, TB_BCAST_SH},
{X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmbkz, TB_BCAST_SD},
{X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmbkz, TB_BCAST_SD},
{X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmbkz, TB_BCAST_SD},
@@ -9136,6 +9531,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmbikz, TB_BCAST_SS},
{X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmbikz, TB_BCAST_SS},
{X86::VSHUFPSZrrikz, X86::VSHUFPSZrmbikz, TB_BCAST_SS},
+ {X86::VSQRTNEPBF16Z128rk, X86::VSQRTNEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Z256rk, X86::VSQRTNEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VSQRTNEPBF16Zrk, X86::VSQRTNEPBF16Zmbk, TB_BCAST_SH},
{X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mbk, TB_BCAST_SD},
{X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mbk, TB_BCAST_SD},
{X86::VSQRTPDZrk, X86::VSQRTPDZmbk, TB_BCAST_SD},
@@ -9145,6 +9543,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mbk, TB_BCAST_SS},
{X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mbk, TB_BCAST_SS},
{X86::VSQRTPSZrk, X86::VSQRTPSZmbk, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmbkz, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmbkz, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmbkz, TB_BCAST_SH},
{X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmbkz, TB_BCAST_SD},
{X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmbkz, TB_BCAST_SD},
{X86::VSUBPDZrrkz, X86::VSUBPDZrmbkz, TB_BCAST_SD},
@@ -9175,6 +9576,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
};
static const X86FoldTableEntry BroadcastTable4[] = {
+ {X86::VADDNEPBF16Z128rrk, X86::VADDNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VADDNEPBF16Z256rrk, X86::VADDNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VADDNEPBF16Zrrk, X86::VADDNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VADDPDZ128rrk, X86::VADDPDZ128rmbk, TB_BCAST_SD},
{X86::VADDPDZ256rrk, X86::VADDPDZ256rmbk, TB_BCAST_SD},
{X86::VADDPDZrrk, X86::VADDPDZrmbk, TB_BCAST_SD},
@@ -9232,6 +9636,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmbk, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmbk, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmbk, TB_BCAST_SS},
+ {X86::VDIVNEPBF16Z128rrk, X86::VDIVNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Z256rrk, X86::VDIVNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VDIVNEPBF16Zrrk, X86::VDIVNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmbk, TB_BCAST_SD},
{X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmbk, TB_BCAST_SD},
{X86::VDIVPDZrrk, X86::VDIVPDZrmbk, TB_BCAST_SD},
@@ -9274,6 +9681,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFIXUPIMMPSZ256rrikz, X86::VFIXUPIMMPSZ256rmbikz, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrrik, X86::VFIXUPIMMPSZrmbik, TB_BCAST_SS},
{X86::VFIXUPIMMPSZrrikz, X86::VFIXUPIMMPSZrmbikz, TB_BCAST_SS},
+ {X86::VFMADD132NEPBF16Z128rk, X86::VFMADD132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z128rkz, X86::VFMADD132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256rk, X86::VFMADD132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Z256rkz, X86::VFMADD132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zrk, X86::VFMADD132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD132NEPBF16Zrkz, X86::VFMADD132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mbk, TB_BCAST_SD},
@@ -9292,6 +9705,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADD132PSZ256rkz, X86::VFMADD132PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADD132PSZrk, X86::VFMADD132PSZmbk, TB_BCAST_SS},
{X86::VFMADD132PSZrkz, X86::VFMADD132PSZmbkz, TB_BCAST_SS},
+ {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Z256rkz, X86::VFMADD213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zrk, X86::VFMADD213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD213NEPBF16Zrkz, X86::VFMADD213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mbk, TB_BCAST_SD},
@@ -9310,6 +9729,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADD213PSZ256rkz, X86::VFMADD213PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADD213PSZrk, X86::VFMADD213PSZmbk, TB_BCAST_SS},
{X86::VFMADD213PSZrkz, X86::VFMADD213PSZmbkz, TB_BCAST_SS},
+ {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Z256rkz, X86::VFMADD231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zrk, X86::VFMADD231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMADD231NEPBF16Zrkz, X86::VFMADD231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mbk, TB_BCAST_SD},
{X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mbkz, TB_BCAST_SD},
{X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mbk, TB_BCAST_SD},
@@ -9388,6 +9813,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mbkz, TB_BCAST_SS},
{X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmbk, TB_BCAST_SS},
{X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB132NEPBF16Z128rk, X86::VFMSUB132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z128rkz, X86::VFMSUB132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256rk, X86::VFMSUB132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Z256rkz, X86::VFMSUB132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zrk, X86::VFMSUB132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB132NEPBF16Zrkz, X86::VFMSUB132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mbk, TB_BCAST_SD},
@@ -9406,6 +9837,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMSUB132PSZ256rkz, X86::VFMSUB132PSZ256mbkz, TB_BCAST_SS},
{X86::VFMSUB132PSZrk, X86::VFMSUB132PSZmbk, TB_BCAST_SS},
{X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Z256rkz, X86::VFMSUB213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zrk, X86::VFMSUB213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB213NEPBF16Zrkz, X86::VFMSUB213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mbk, TB_BCAST_SD},
@@ -9424,6 +9861,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMSUB213PSZ256rkz, X86::VFMSUB213PSZ256mbkz, TB_BCAST_SS},
{X86::VFMSUB213PSZrk, X86::VFMSUB213PSZmbk, TB_BCAST_SS},
{X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmbkz, TB_BCAST_SS},
+ {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Z256rkz, X86::VFMSUB231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zrk, X86::VFMSUB231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFMSUB231NEPBF16Zrkz, X86::VFMSUB231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mbk, TB_BCAST_SD},
{X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mbkz, TB_BCAST_SD},
{X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mbk, TB_BCAST_SD},
@@ -9499,6 +9942,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmbk, TB_BCAST_SS},
{X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmbk, TB_BCAST_SS},
{X86::VFMULCPHZrrk, X86::VFMULCPHZrmbk, TB_BCAST_SS},
+ {X86::VFNMADD132NEPBF16Z128rk, X86::VFNMADD132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z128rkz, X86::VFNMADD132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256rk, X86::VFNMADD132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Z256rkz, X86::VFNMADD132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zrk, X86::VFNMADD132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD132NEPBF16Zrkz, X86::VFNMADD132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mbk, TB_BCAST_SD},
@@ -9517,6 +9966,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD132PSZ256rkz, X86::VFNMADD132PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD132PSZrk, X86::VFNMADD132PSZmbk, TB_BCAST_SS},
{X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Z256rkz, X86::VFNMADD213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zrk, X86::VFNMADD213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD213NEPBF16Zrkz, X86::VFNMADD213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mbk, TB_BCAST_SD},
@@ -9535,6 +9990,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD213PSZ256rkz, X86::VFNMADD213PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD213PSZrk, X86::VFNMADD213PSZmbk, TB_BCAST_SS},
{X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Z256rkz, X86::VFNMADD231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zrk, X86::VFNMADD231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMADD231NEPBF16Zrkz, X86::VFNMADD231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mbk, TB_BCAST_SD},
{X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mbk, TB_BCAST_SD},
@@ -9553,6 +10014,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMADD231PSZ256rkz, X86::VFNMADD231PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMADD231PSZrk, X86::VFNMADD231PSZmbk, TB_BCAST_SS},
{X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Z256rkz, X86::VFNMSUB132NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zrk, X86::VFNMSUB132NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB132NEPBF16Zrkz, X86::VFNMSUB132NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mbk, TB_BCAST_SD},
@@ -9571,6 +10038,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMSUB132PSZ256rkz, X86::VFNMSUB132PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMSUB132PSZrk, X86::VFNMSUB132PSZmbk, TB_BCAST_SS},
{X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Z256rkz, X86::VFNMSUB213NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zrk, X86::VFNMSUB213NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB213NEPBF16Zrkz, X86::VFNMSUB213NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mbk, TB_BCAST_SD},
@@ -9589,6 +10062,12 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VFNMSUB213PSZ256rkz, X86::VFNMSUB213PSZ256mbkz, TB_BCAST_SS},
{X86::VFNMSUB213PSZrk, X86::VFNMSUB213PSZmbk, TB_BCAST_SS},
{X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmbkz, TB_BCAST_SS},
+ {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Z256rkz, X86::VFNMSUB231NEPBF16Z256mbkz, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zrk, X86::VFNMSUB231NEPBF16Zmbk, TB_BCAST_SH},
+ {X86::VFNMSUB231NEPBF16Zrkz, X86::VFNMSUB231NEPBF16Zmbkz, TB_BCAST_SH},
{X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mbk, TB_BCAST_SD},
{X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mbkz, TB_BCAST_SD},
{X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mbk, TB_BCAST_SD},
@@ -9622,6 +10101,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmbk, TB_BCAST_SS},
{X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmbk, TB_BCAST_SS},
{X86::VMAXCPSZrrk, X86::VMAXCPSZrmbk, TB_BCAST_SS},
+ {X86::VMAXPBF16Z128rrk, X86::VMAXPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMAXPBF16Z256rrk, X86::VMAXPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMAXPBF16Zrrk, X86::VMAXPBF16Zrmbk, TB_BCAST_SH},
{X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmbk, TB_BCAST_SD},
{X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmbk, TB_BCAST_SD},
{X86::VMAXPDZrrk, X86::VMAXPDZrmbk, TB_BCAST_SD},
@@ -9652,6 +10134,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMINMAXPSZ128rrik, X86::VMINMAXPSZ128rmbik, TB_BCAST_SS},
{X86::VMINMAXPSZ256rrik, X86::VMINMAXPSZ256rmbik, TB_BCAST_SS},
{X86::VMINMAXPSZrrik, X86::VMINMAXPSZrmbik, TB_BCAST_SS},
+ {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmbk, TB_BCAST_SH},
{X86::VMINPDZ128rrk, X86::VMINPDZ128rmbk, TB_BCAST_SD},
{X86::VMINPDZ256rrk, X86::VMINPDZ256rmbk, TB_BCAST_SD},
{X86::VMINPDZrrk, X86::VMINPDZrmbk, TB_BCAST_SD},
@@ -9661,6 +10146,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VMINPSZ128rrk, X86::VMINPSZ128rmbk, TB_BCAST_SS},
{X86::VMINPSZ256rrk, X86::VMINPSZ256rmbk, TB_BCAST_SS},
{X86::VMINPSZrrk, X86::VMINPSZrmbk, TB_BCAST_SS},
+ {X86::VMULNEPBF16Z128rrk, X86::VMULNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VMULNEPBF16Z256rrk, X86::VMULNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VMULNEPBF16Zrrk, X86::VMULNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VMULPDZ128rrk, X86::VMULPDZ128rmbk, TB_BCAST_SD},
{X86::VMULPDZ256rrk, X86::VMULPDZ256rmbk, TB_BCAST_SD},
{X86::VMULPDZrrk, X86::VMULPDZrmbk, TB_BCAST_SD},
@@ -10023,6 +10511,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VRANGEPSZ128rrik, X86::VRANGEPSZ128rmbik, TB_BCAST_SS},
{X86::VRANGEPSZ256rrik, X86::VRANGEPSZ256rmbik, TB_BCAST_SS},
{X86::VRANGEPSZrrik, X86::VRANGEPSZrmbik, TB_BCAST_SS},
+ {X86::VSCALEFPBF16Z128rrk, X86::VSCALEFPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Z256rrk, X86::VSCALEFPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VSCALEFPBF16Zrrk, X86::VSCALEFPBF16Zrmbk, TB_BCAST_SH},
{X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmbk, TB_BCAST_SD},
{X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmbk, TB_BCAST_SD},
{X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmbk, TB_BCAST_SD},
@@ -10046,6 +10537,9 @@ static const X86FoldTableEntry BroadcastTable4[] = {
{X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmbik, TB_BCAST_SS},
{X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmbik, TB_BCAST_SS},
{X86::VSHUFPSZrrik, X86::VSHUFPSZrmbik, TB_BCAST_SS},
+ {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmbk, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmbk, TB_BCAST_SH},
+ {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmbk, TB_BCAST_SH},
{X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmbk, TB_BCAST_SD},
{X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmbk, TB_BCAST_SD},
{X86::VSUBPDZrrk, X86::VSUBPDZrmbk, TB_BCAST_SD},
>From 7957436ac07d1902f3f510de756577c00353dd02 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 29 Aug 2024 13:42:24 +0800
Subject: [PATCH 2/5] address comments
---
clang/lib/Headers/CMakeLists.txt | 2 +-
clang/lib/Headers/avx10_2_512bf16intrin.h | 72 +-
clang/lib/Headers/avx10_2bf16intrin.h | 153 ++--
clang/lib/Sema/SemaX86.cpp | 9 +
.../CodeGen/X86/avx10_2_512bf16-builtins.c | 31 +
clang/test/CodeGen/X86/avx10_2bf16-builtins.c | 64 ++
llvm/include/llvm/IR/IntrinsicsX86.td | 655 +++++++-----------
llvm/lib/Target/X86/X86InstrAVX10.td | 235 +++----
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 -
.../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 66 --
.../CodeGen/X86/avx10_2bf16-intrinsics.ll | 66 ++
11 files changed, 653 insertions(+), 705 deletions(-)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 90d431f8627965..e928b5b142827b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -152,8 +152,8 @@ set(x86_files
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
avx10_2_512satcvtintrin.h
- avx10_2convertintrin.h
avx10_2bf16intrin.h
+ avx10_2convertintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtintrin.h
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 158d5686c8f02f..392b7ae770c5b5 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -286,23 +286,23 @@ _mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
(__v32bf)_mm512_setzero_pbh());
}
-#define _mm512_cmp_pbh_mask(A, B, P) \
- ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
- (__v32bf)(__m512bh)(B), \
- (int)(P), (__mmask32) - 1))
+#define _mm512_cmp_pbh_mask(__A, __B, __P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
+ (__v32bf)(__m512bh)(__B), \
+ (int)(__P), (__mmask32) - 1))
-#define _mm512_mask_cmp_pbh_mask(U, A, B, P) \
- ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(A), \
- (__v32bf)(__m512bh)(B), \
- (int)(P), (__mmask32)(U)))
+#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
+ ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
+ (__v32bf)(__m512bh)(__B), \
+ (int)(__P), (__mmask32)(__U)))
-#define _mm512_mask_fpclass_pbh_mask(U, A, imm) \
+#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32)(U)))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
-#define _mm512_fpclass_pbh_mask(A, imm) \
+#define _mm512_fpclass_pbh_mask(__A, imm) \
((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(imm), (__mmask32) - 1))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
@@ -377,50 +377,50 @@ _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
(__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
}
-#define _mm512_reducene_pbh(A, imm) \
+#define _mm512_reducene_pbh(__A, imm) \
((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
(__mmask32) - 1))
-#define _mm512_mask_reducene_pbh(W, U, A, imm) \
+#define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \
((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)(__m512bh)(W), \
- (__mmask32)(U)))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
+ (__mmask32)(__U)))
-#define _mm512_maskz_reducene_pbh(U, A, imm) \
+#define _mm512_maskz_reducene_pbh(__U, __A, imm) \
((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
- (__mmask32)(U)))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(__U)))
-#define _mm512_roundscalene_pbh(A, B) \
+#define _mm512_roundscalene_pbh(__A, imm) \
((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
- (__v32bf)(__m512bh)(A), (int)(B), (__v32bf)(__m512bh)(A), \
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
(__mmask32) - 1))
-#define _mm512_mask_roundscalene_pbh(A, B, C, imm) \
+#define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \
((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
- (__v32bf)(__m512bh)(C), (int)(imm), (__v32bf)(__m512bh)(A), \
- (__mmask32)(B)))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
+ (__mmask32)(__U)))
-#define _mm512_maskz_roundscalene_pbh(A, B, imm) \
+#define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \
((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
- (__v32bf)(__m512bh)(B), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
- (__mmask32)(A)))
+ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
+ (__mmask32)(__U)))
-#define _mm512_getmant_pbh(A, B, C) \
+#define _mm512_getmant_pbh(__A, __B, __C) \
((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
+ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
(__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
-#define _mm512_mask_getmant_pbh(W, U, A, B, C) \
+#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), (__v32bf)(__m512bh)(W), \
- (__mmask32)(U)))
+ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
-#define _mm512_maskz_getmant_pbh(U, A, B, C) \
+#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
- (__v32bf)(__m512bh)(A), (int)(((C) << 2) | (B)), \
- (__v32bf)_mm512_setzero_pbh(), (__mmask32)(U)))
+ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A);
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 8c03ddbf2f71d1..0a427b9b7418b9 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -553,38 +553,40 @@ static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A,
return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B);
}
-#define _mm256_cmp_pbh_mask(A, B, P) \
- ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
- (__v16bf)(__m256bh)(B), \
- (int)(P), (__mmask16) - 1))
-
-#define _mm256_mask_cmp_pbh_mask(U, A, B, P) \
- ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(A), \
- (__v16bf)(__m256bh)(B), \
- (int)(P), (__mmask16)(U)))
-
-#define _mm_cmp_pbh_mask(A, B, P) \
- ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
- (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8) - 1))
-
-#define _mm_mask_cmp_pbh_mask(U, A, B, P) \
- ((__mmask8)__builtin_ia32_vcmppbf16128_mask( \
- (__v8bf)(__m128bh)(A), (__v8bf)(__m128bh)(B), (int)(P), (__mmask8)(U)))
-
-#define _mm256_mask_fpclass_pbh_mask(U, A, imm) \
+#define _mm256_cmp_pbh_mask(__A, __B, __P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \
+ (__v16bf)(__m256bh)(__B), \
+ (int)(__P), (__mmask16) - 1))
+
+#define _mm256_mask_cmp_pbh_mask(__U, __A, __B, __P) \
+ ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \
+ (__v16bf)(__m256bh)(__B), \
+ (int)(__P), (__mmask16)(__U)))
+
+#define _mm_cmp_pbh_mask(__A, __B, __P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \
+ (__v8bf)(__m128bh)(__B), \
+ (int)(__P), (__mmask8) - 1))
+
+#define _mm_mask_cmp_pbh_mask(__U, __A, __B, __P) \
+ ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \
+ (__v8bf)(__m128bh)(__B), \
+ (int)(__P), (__mmask8)(__U)))
+
+#define _mm256_mask_fpclass_pbh_mask(__U, __A, imm) \
((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16)(U)))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16)(__U)))
-#define _mm256_fpclass_pbh_mask(A, imm) \
+#define _mm256_fpclass_pbh_mask(__A, imm) \
((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(imm), (__mmask16) - 1))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16) - 1))
-#define _mm_mask_fpclass_pbh_mask(U, A, imm) \
- ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
- (int)(imm), (__mmask8)(U)))
+#define _mm_mask_fpclass_pbh_mask(__U, __A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask( \
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__mmask8)(__U)))
-#define _mm_fpclass_pbh_mask(A, imm) \
- ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(A), \
+#define _mm_fpclass_pbh_mask(__A, imm) \
+ ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(__A), \
(int)(imm), (__mmask8) - 1))
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -729,94 +731,95 @@ _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
(__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U);
}
-#define _mm256_reducene_pbh(A, imm) \
+#define _mm256_reducene_pbh(__A, imm) \
((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \
(__mmask16) - 1))
-#define _mm256_mask_reducene_pbh(W, U, A, imm) \
+#define _mm256_mask_reducene_pbh(__W, __U, __A, imm) \
((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)(__m256bh)(W), \
- (__mmask16)(U)))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \
+ (__mmask16)(__U)))
-#define _mm256_maskz_reducene_pbh(U, A, imm) \
+#define _mm256_maskz_reducene_pbh(__U, __A, imm) \
((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
- (__mmask16)(U)))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(__U)))
-#define _mm_reducene_pbh(A, imm) \
+#define _mm_reducene_pbh(__A, imm) \
((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \
(__mmask8) - 1))
-#define _mm_mask_reducene_pbh(W, U, A, imm) \
+#define _mm_mask_reducene_pbh(__W, __U, __A, imm) \
((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)(__m128bh)(W), \
- (__mmask8)(U)))
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \
+ (__mmask8)(__U)))
-#define _mm_maskz_reducene_pbh(U, A, imm) \
+#define _mm_maskz_reducene_pbh(__U, __A, imm) \
((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
- (__mmask8)(U)))
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(__U)))
-#define _mm256_roundscalene_pbh(A, B) \
+#define _mm256_roundscalene_pbh(__A, imm) \
((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
- (__v16bf)(__m256bh)(A), (int)(B), (__v16bf)(__m256bh)(A), \
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
(__mmask16) - 1))
-#define _mm256_mask_roundscalene_pbh(A, B, C, imm) \
+#define _mm256_mask_roundscalene_pbh(__W, __U, __A, imm) \
((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
- (__v16bf)(__m256bh)(C), (int)(imm), (__v16bf)(__m256bh)(A), \
- (__mmask16)(B)))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \
+ (__mmask16)(__U)))
-#define _mm256_maskz_roundscalene_pbh(A, B, imm) \
+#define _mm256_maskz_roundscalene_pbh(__U, __A, imm) \
((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \
- (__v16bf)(__m256bh)(B), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
- (__mmask16)(A)))
+ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \
+ (__mmask16)(__U)))
-#define _mm_roundscalene_pbh(A, B) \
+#define _mm_roundscalene_pbh(__A, imm) \
((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
- (__v8bf)(__m128bh)(A), (int)(B), (__v8bf)(__m128bh)(A), (__mmask8) - 1))
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8) - 1))
-#define _mm_mask_roundscalene_pbh(A, B, C, imm) \
+#define _mm_mask_roundscalene_pbh(__W, __U, __A, imm) \
((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
- (__v8bf)(__m128bh)(C), (int)(imm), (__v8bf)(__m128bh)(A), \
- (__mmask8)(B)))
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \
+ (__mmask8)(__U)))
-#define _mm_maskz_roundscalene_pbh(A, B, imm) \
+#define _mm_maskz_roundscalene_pbh(__U, __A, imm) \
((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \
- (__v8bf)(__m128bh)(B), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
- (__mmask8)(A)))
+ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \
+ (__mmask8)(__U)))
-#define _mm256_getmant_pbh(A, B, C) \
+#define _mm256_getmant_pbh(__A, __B, __C) \
((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
+ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \
(__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1))
-#define _mm256_mask_getmant_pbh(W, U, A, B, C) \
+#define _mm256_mask_getmant_pbh(__W, __U, __A, __B, __C) \
((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), (__v16bf)(__m256bh)(W), \
- (__mmask16)(U)))
+ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v16bf)(__m256bh)(__W), (__mmask16)(__U)))
-#define _mm256_maskz_getmant_pbh(U, A, B, C) \
+#define _mm256_maskz_getmant_pbh(__U, __A, __B, __C) \
((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \
- (__v16bf)(__m256bh)(A), (int)(((C) << 2) | (B)), \
- (__v16bf)_mm256_setzero_pbh(), (__mmask16)(U)))
+ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v16bf)_mm256_setzero_pbh(), (__mmask16)(__U)))
-#define _mm_getmant_pbh(A, B, C) \
+#define _mm_getmant_pbh(__A, __B, __C) \
((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
+ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \
(__v8bf)_mm_undefined_pbh(), (__mmask8) - 1))
-#define _mm_mask_getmant_pbh(W, U, A, B, C) \
+#define _mm_mask_getmant_pbh(__W, __U, __A, __B, __C) \
((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), (__v8bf)(__m128bh)(W), \
- (__mmask8)(U)))
+ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v8bf)(__m128bh)(__W), (__mmask8)(__U)))
-#define _mm_maskz_getmant_pbh(U, A, B, C) \
+#define _mm_maskz_getmant_pbh(__U, __A, __B, __C) \
((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \
- (__v8bf)(__m128bh)(A), (int)(((C) << 2) | (B)), \
- (__v8bf)_mm_setzero_pbh(), (__mmask8)(U)))
+ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \
+ (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U)))
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) {
return (__m256bh)__builtin_ia32_vsqrtnepbf16256((__v16bf)__A);
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 311e574537059d..233a068c8574ce 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -875,6 +875,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_rndscaleps_mask:
case X86::BI__builtin_ia32_rndscalepd_mask:
case X86::BI__builtin_ia32_rndscaleph_mask:
+ case X86::BI__builtin_ia32_vrndscalenepbf16_128_mask:
+ case X86::BI__builtin_ia32_vrndscalenepbf16_256_mask:
+ case X86::BI__builtin_ia32_vrndscalenepbf16_mask:
case X86::BI__builtin_ia32_reducepd128_mask:
case X86::BI__builtin_ia32_reducepd256_mask:
case X86::BI__builtin_ia32_reducepd512_mask:
@@ -884,6 +887,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_reduceph128_mask:
case X86::BI__builtin_ia32_reduceph256_mask:
case X86::BI__builtin_ia32_reduceph512_mask:
+ case X86::BI__builtin_ia32_vreducenepbf16128_mask:
+ case X86::BI__builtin_ia32_vreducenepbf16256_mask:
+ case X86::BI__builtin_ia32_vreducenepbf16512_mask:
case X86::BI__builtin_ia32_vreducepd256_round_mask:
case X86::BI__builtin_ia32_vreduceps256_round_mask:
case X86::BI__builtin_ia32_vreduceph256_round_mask:
@@ -911,6 +917,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclassph128_mask:
case X86::BI__builtin_ia32_fpclassph256_mask:
case X86::BI__builtin_ia32_fpclassph512_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
case X86::BI__builtin_ia32_fpclasssh_mask:
diff --git a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
index b14ff4d1f27e2a..b00859c174fbab 100644
--- a/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512bf16-builtins.c
@@ -580,18 +580,21 @@ __mmask32 test_mm512_cmp_pbh_mask_true_us(__m512bh a, __m512bh b) {
__mmask32 test_mm512_mask_cmp_pbh_mask_eq_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: @test_mm512_mask_cmp_pbh_mask_eq_oq
// CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_lt_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_os
// CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_le_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_os
// CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
}
@@ -604,168 +607,196 @@ __mmask32 test_mm512_mask_cmp_pbh_mask_unord_q(__mmask32 m, __m512bh a, __m512bh
__mmask32 test_mm512_mask_cmp_pbh_mask_neq_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_uq
// CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_us
// CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nle_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_us
// CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ord_q(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_q
// CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_eq_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_uq
// CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nge_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_us
// CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_us
// CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_false_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_oq
// CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_neq_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_oq
// CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ge_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_os
// CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_gt_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_os
// CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_true_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_uq
// CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_eq_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_os
// CHECK: fcmp oeq <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_lt_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_lt_oq
// CHECK: fcmp olt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_le_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_le_oq
// CHECK: fcmp ole <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_unord_s(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_unord_s
// CHECK: fcmp uno <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_neq_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_us
// CHECK: fcmp une <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nlt_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nlt_uq
// CHECK: fcmp uge <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nle_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nle_uq
// CHECK: fcmp ugt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ord_s(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ord_s
// CHECK: fcmp ord <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_eq_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_eq_us
// CHECK: fcmp ueq <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_nge_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_nge_uq
// CHECK: fcmp ult <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ngt_uq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ngt_uq
// CHECK: fcmp ule <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_false_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_false_os
// CHECK: fcmp false <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_neq_os(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_neq_os
// CHECK: fcmp one <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_ge_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_ge_oq
// CHECK: fcmp oge <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_gt_oq(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_gt_oq
// CHECK: fcmp ogt <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
}
__mmask32 test_mm512_mask_cmp_pbh_mask_true_us(__mmask32 m, __m512bh a, __m512bh b) {
// CHECK-LABEL: test_mm512_mask_cmp_pbh_mask_true_us
// CHECK: fcmp true <32 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <32 x i1> %{{.*}}, %{{.*}}
return _mm512_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
}
diff --git a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
index 84bac3e8dc63b8..cd94edcf58ea2f 100644
--- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@@ -879,192 +879,224 @@ __mmask16 test_mm256_cmp_pbh_mask_true_us(__m256bh a, __m256bh b) {
__mmask16 test_mm256_mask_cmp_pbh_mask_eq_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: @test_mm256_mask_cmp_pbh_mask_eq_oq
// CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_lt_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_os
// CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_le_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_os
// CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_unord_q(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_q
// CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_neq_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_uq
// CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_us
// CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nle_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_us
// CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ord_q(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_q
// CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_eq_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_uq
// CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nge_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_us
// CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_us
// CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_false_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_oq
// CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_neq_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_oq
// CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ge_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_os
// CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_gt_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_os
// CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_true_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_uq
// CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_eq_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_os
// CHECK: fcmp oeq <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_lt_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_lt_oq
// CHECK: fcmp olt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_le_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_le_oq
// CHECK: fcmp ole <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_unord_s(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_unord_s
// CHECK: fcmp uno <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_neq_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_us
// CHECK: fcmp une <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nlt_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nlt_uq
// CHECK: fcmp uge <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nle_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nle_uq
// CHECK: fcmp ugt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ord_s(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ord_s
// CHECK: fcmp ord <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_eq_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_eq_us
// CHECK: fcmp ueq <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_nge_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_nge_uq
// CHECK: fcmp ult <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ngt_uq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ngt_uq
// CHECK: fcmp ule <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_false_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_false_os
// CHECK: fcmp false <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_neq_os(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_neq_os
// CHECK: fcmp one <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_ge_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_ge_oq
// CHECK: fcmp oge <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_gt_oq(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_gt_oq
// CHECK: fcmp ogt <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
}
__mmask16 test_mm256_mask_cmp_pbh_mask_true_us(__mmask16 m, __m256bh a, __m256bh b) {
// CHECK-LABEL: test_mm256_mask_cmp_pbh_mask_true_us
// CHECK: fcmp true <16 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
return _mm256_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
}
@@ -1263,192 +1295,224 @@ __mmask8 test_mm_cmp_pbh_mask_true_us(__m128bh a, __m128bh b) {
__mmask8 test_mm_mask_cmp_pbh_mask_eq_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: @test_mm_mask_cmp_pbh_mask_eq_oq
// CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_lt_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_os
// CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_le_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_os
// CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_unord_q(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_q
// CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_Q);
}
__mmask8 test_mm_mask_cmp_pbh_mask_neq_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_uq
// CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nlt_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_us
// CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nle_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_us
// CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ord_q(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_q
// CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_Q);
}
__mmask8 test_mm_mask_cmp_pbh_mask_eq_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_uq
// CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nge_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_us
// CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ngt_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_us
// CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_false_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_oq
// CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_neq_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_oq
// CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ge_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_os
// CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_gt_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_os
// CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_true_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_uq
// CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_eq_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_os
// CHECK: fcmp oeq <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_lt_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_lt_oq
// CHECK: fcmp olt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LT_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_le_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_le_oq
// CHECK: fcmp ole <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_LE_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_unord_s(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_unord_s
// CHECK: fcmp uno <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_UNORD_S);
}
__mmask8 test_mm_mask_cmp_pbh_mask_neq_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_us
// CHECK: fcmp une <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nlt_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nlt_uq
// CHECK: fcmp uge <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLT_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nle_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nle_uq
// CHECK: fcmp ugt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NLE_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ord_s(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ord_s
// CHECK: fcmp ord <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_ORD_S);
}
__mmask8 test_mm_mask_cmp_pbh_mask_eq_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_eq_us
// CHECK: fcmp ueq <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_EQ_US);
}
__mmask8 test_mm_mask_cmp_pbh_mask_nge_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_nge_uq
// CHECK: fcmp ult <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGE_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ngt_uq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ngt_uq
// CHECK: fcmp ule <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NGT_UQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_false_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_false_os
// CHECK: fcmp false <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_FALSE_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_neq_os(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_neq_os
// CHECK: fcmp one <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_NEQ_OS);
}
__mmask8 test_mm_mask_cmp_pbh_mask_ge_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_ge_oq
// CHECK: fcmp oge <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GE_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_gt_oq(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_gt_oq
// CHECK: fcmp ogt <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_GT_OQ);
}
__mmask8 test_mm_mask_cmp_pbh_mask_true_us(__mmask8 m, __m128bh a, __m128bh b) {
// CHECK-LABEL: test_mm_mask_cmp_pbh_mask_true_us
// CHECK: fcmp true <8 x bfloat> %{{.*}}, %{{.*}}
+ // CHECK: and <8 x i1> %{{.*}}, %{{.*}}
return _mm_mask_cmp_pbh_mask(m, a, b, _CMP_TRUE_US);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 67114399d17f86..fafa5051bfb1b4 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7222,410 +7222,253 @@ def int_x86_avx10_mask_vcvtneph2hf8s512 : ClangBuiltin<"__builtin_ia32_vcvtneph2
//===----------------------------------------------------------------------===//
let TargetPrefix = "x86" in {
- def int_x86_avx10_vaddnepbf16512
- : ClangBuiltin<"__builtin_ia32_vaddnepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vaddnepbf16256
- : ClangBuiltin<"__builtin_ia32_vaddnepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vaddnepbf16128
- : ClangBuiltin<"__builtin_ia32_vaddnepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vsubnepbf16512
- : ClangBuiltin<"__builtin_ia32_vsubnepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vsubnepbf16256
- : ClangBuiltin<"__builtin_ia32_vsubnepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vsubnepbf16128
- : ClangBuiltin<"__builtin_ia32_vsubnepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmulnepbf16512
- : ClangBuiltin<"__builtin_ia32_vmulnepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmulnepbf16256
- : ClangBuiltin<"__builtin_ia32_vmulnepbf16256">,
- Intrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmulnepbf16128
- : ClangBuiltin<"__builtin_ia32_vmulnepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vdivnepbf16512
- : ClangBuiltin<"__builtin_ia32_vdivnepbf16512">,
- Intrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vdivnepbf16256
- : ClangBuiltin<"__builtin_ia32_vdivnepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vdivnepbf16128
- : ClangBuiltin<"__builtin_ia32_vdivnepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmaxpbf16512
- : ClangBuiltin<"__builtin_ia32_vmaxpbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmaxpbf16256
- : ClangBuiltin<"__builtin_ia32_vmaxpbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vmaxpbf16128
- : ClangBuiltin<"__builtin_ia32_vmaxpbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vminpbf16512
- : ClangBuiltin<"__builtin_ia32_vminpbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vminpbf16256
- : ClangBuiltin<"__builtin_ia32_vminpbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vminpbf16128
- : ClangBuiltin<"__builtin_ia32_vminpbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vcomsbf16eq
- : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty, llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_vcomsbf16lt
- : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty,llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_vcomsbf16le
- : ClangBuiltin<"__builtin_ia32_vcomsbf16le">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty, llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_vcomsbf16gt
- : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty, llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_vcomsbf16ge
- : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty, llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_vcomsbf16neq
- : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_v8bf16_ty, llvm_v8bf16_ty],
- [IntrNoMem]>;
- def int_x86_avx10_mask_rsqrt_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
- def int_x86_avx10_mask_rsqrt_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_rsqrt_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_rcp_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vrcppbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
- def int_x86_avx10_mask_rcp_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vrcppbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_rcp_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vrcppbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_reduce_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vreducenepbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_reduce_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vreducenepbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_reduce_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vreducenepbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_fpclass_nepbf16_128
- : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], [ llvm_v8bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_fpclass_nepbf16_256
- : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], [ llvm_v16bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_fpclass_nepbf16_512
- : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], [ llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_getexp_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vgetexppbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
- def int_x86_avx10_mask_getexp_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vgetexppbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_getexp_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vgetexppbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_getmant_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vgetmantpbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_getmant_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vgetmantpbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_getmant_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vgetmantpbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_rndscale_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_rndscale_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_rndscale_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
- def int_x86_avx10_mask_scalef_nepbf16_128
- : ClangBuiltin<"__builtin_ia32_vscalefpbf16128_mask">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_scalef_nepbf16_256
- : ClangBuiltin<"__builtin_ia32_vscalefpbf16256_mask">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_mask_scalef_nepbf16_512
- : ClangBuiltin<"__builtin_ia32_vscalefpbf16512_mask">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd213nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd213nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd132nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd132nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd132nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd231nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd231nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmadd231nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub213nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub213nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub213nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub132nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub132nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub132nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub231nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub231nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfmsub231nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd213nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd213nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd213nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd132nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd132nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd132nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd231nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd231nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmadd231nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub213nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub213nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub213nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub132nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub132nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub132nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub231nepbf16512
- : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16512">,
- DefaultAttrsIntrinsic<[ llvm_v32bf16_ty ],
- [ llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub231nepbf16256
- : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16256">,
- DefaultAttrsIntrinsic<[ llvm_v16bf16_ty ],
- [ llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty ],
- [ IntrNoMem ]>;
- def int_x86_avx10_vfnmsub231nepbf16128
- : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16128">,
- DefaultAttrsIntrinsic<[ llvm_v8bf16_ty ],
- [ llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
- [ IntrNoMem ]>;
+def int_x86_avx10_vaddnepbf16512 : ClangBuiltin<"__builtin_ia32_vaddnepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vaddnepbf16256 : ClangBuiltin<"__builtin_ia32_vaddnepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vaddnepbf16128 : ClangBuiltin<"__builtin_ia32_vaddnepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vsubnepbf16512 : ClangBuiltin<"__builtin_ia32_vsubnepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vsubnepbf16256 : ClangBuiltin<"__builtin_ia32_vsubnepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vsubnepbf16128 : ClangBuiltin<"__builtin_ia32_vsubnepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmulnepbf16512 : ClangBuiltin<"__builtin_ia32_vmulnepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmulnepbf16256 : ClangBuiltin<"__builtin_ia32_vmulnepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmulnepbf16128 : ClangBuiltin<"__builtin_ia32_vmulnepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vdivnepbf16512 : ClangBuiltin<"__builtin_ia32_vdivnepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vdivnepbf16256 : ClangBuiltin<"__builtin_ia32_vdivnepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vdivnepbf16128 : ClangBuiltin<"__builtin_ia32_vdivnepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmaxpbf16512 : ClangBuiltin<"__builtin_ia32_vmaxpbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmaxpbf16256 : ClangBuiltin<"__builtin_ia32_vmaxpbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vmaxpbf16128 : ClangBuiltin<"__builtin_ia32_vmaxpbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vminpbf16512 : ClangBuiltin<"__builtin_ia32_vminpbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vminpbf16256 : ClangBuiltin<"__builtin_ia32_vminpbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vminpbf16128 : ClangBuiltin<"__builtin_ia32_vminpbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16eq : ClangBuiltin<"__builtin_ia32_vcomsbf16eq">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16lt : ClangBuiltin<"__builtin_ia32_vcomsbf16lt">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty,llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16le : ClangBuiltin<"__builtin_ia32_vcomsbf16le">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16gt : ClangBuiltin<"__builtin_ia32_vcomsbf16gt">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16ge : ClangBuiltin<"__builtin_ia32_vcomsbf16ge">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vcomsbf16neq : ClangBuiltin<"__builtin_ia32_vcomsbf16neq">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rsqrt_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rsqrt_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rsqrt_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrsqrtpbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rcp_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrcppbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rcp_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrcppbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_rcp_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrcppbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_reduce_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vreducenepbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_reduce_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vreducenepbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_reduce_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vreducenepbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_fpclass_nepbf16_128 :
+ DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_fpclass_nepbf16_256 :
+ DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_v16bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_fpclass_nepbf16_512 :
+ DefaultAttrsIntrinsic<[llvm_v32i1_ty], [llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_getexp_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vgetexppbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_getexp_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vgetexppbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_getexp_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vgetexppbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_getmant_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_getmant_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_getmant_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vgetmantpbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_rndscale_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_i32_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_rndscale_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_i32_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_rndscale_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vrndscalenepbf16_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_i32_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+def int_x86_avx10_mask_scalef_nepbf16_128 : ClangBuiltin<"__builtin_ia32_vscalefpbf16128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_scalef_nepbf16_256 : ClangBuiltin<"__builtin_ia32_vscalefpbf16256_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_mask_scalef_nepbf16_512 : ClangBuiltin<"__builtin_ia32_vscalefpbf16512_mask">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd213nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd132nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmadd231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmadd231nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub213nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub132nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfmsub231nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd213nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd132nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmadd231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmadd231nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub213nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub213nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub213nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub213nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub132nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub132nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub132nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub132nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub231nepbf16512 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16512">,
+ DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v32bf16_ty, llvm_v32bf16_ty, llvm_v32bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub231nepbf16256 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16256">,
+ DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16bf16_ty, llvm_v16bf16_ty, llvm_v16bf16_ty],
+ [IntrNoMem]>;
+def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231nepbf16128">,
+ DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
+ [IntrNoMem]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 8fc66d24b1658c..bf16849331ee48 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -917,73 +917,75 @@ defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info,
// VADDNEPBF16, VSUBNEPBF16, VMULNEPBF16, VDIVNEPBF16, VMAXPBF16, VMINPBF16
multiclass avx10_fp_binopne_int_pbf16<bits<8> opc, string OpcodeStr,
- X86SchedWriteSizes sched,
- bit IsCommutable = 0> {
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_fp_packed<opc, OpcodeStr,
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
- v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16512"),
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_fp_packed<opc, OpcodeStr,
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
- v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
- defm PBF16Z256 : avx512_fp_packed<opc, OpcodeStr,
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
- !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
- v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z128 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16128"),
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z256 : avx512_fp_packed<opc, OpcodeStr,
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"pbf16256"),
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
}
}
multiclass avx10_fp_binop_pbf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- X86SchedWriteSizes sched,
- bit IsCommutable = 0,
- SDPatternOperator MaskOpNode = OpNode> {
+ X86SchedWriteSizes sched,
+ bit IsCommutable = 0,
+ SDPatternOperator MaskOpNode = OpNode> {
let Predicates = [HasAVX10_2_512] in
- defm NEPBF16Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
- v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm NEPBF16Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
- v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
- defm NEPBF16Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
- v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
- T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
+ defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
+ v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
+ T_MAP5, PD, EVEX_CD8<16, CD8VF>;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VADD : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>;
-defm VSUB : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>;
-defm VMUL : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 0>;
-defm VDIV : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>;
-defm VMIN : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
-defm VMAX : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
+defm VADDNEPBF16 : avx10_fp_binop_pbf16<0x58, "vaddne", fadd, SchedWriteFAddSizes, 1>;
+defm VSUBNEPBF16 : avx10_fp_binop_pbf16<0x5C, "vsubne", fsub, SchedWriteFAddSizes, 0>;
+defm VMULNEPBF16 : avx10_fp_binop_pbf16<0x59, "vmulne", fmul, SchedWriteFMulSizes, 1>;
+defm VDIVNEPBF16 : avx10_fp_binop_pbf16<0x5E, "vdivne", fdiv, SchedWriteFDivSizes, 0>;
+defm VMINPBF16 : avx10_fp_binopne_int_pbf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
+defm VMAXPBF16 : avx10_fp_binopne_int_pbf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
}
// VCOMSBF16
let Uses = []<Register>, mayRaiseFPException = 0,
- Defs = [EFLAGS], Predicates = [HasAVX10_2_512] in {
+ Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
+ //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *`
+ //which may require extend supports on BFR16X, loadbf16, ...
defm VCOMSBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
- "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
- VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ "comsbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
let isCodeGenOnly = 1 in {
defm VCOMSBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
- sse_load_bf16, "comsbf16", SSEPackedSingle>,
- T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ sse_load_bf16, "comsbf16", SSEPackedSingle>,
+ T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
}
// VCMPPBF16
multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let mayRaiseFPException = 0 in {
- defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
@@ -991,7 +993,7 @@ multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1>, Sched<[sched]>;
- defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
@@ -1001,7 +1003,7 @@ multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _
timm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
@@ -1019,101 +1021,102 @@ multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _
multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX10_2_512] in
- defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
+ defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX10_2] in {
- defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
- defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
+ defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
}
}
defm VCMPPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
- AVX512XDIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
+ AVX512XDIi8Base, EVEX, VVVV,
+ EVEX_CD8<16, CD8VF>, TA;
// VSQRTNEPBF16
multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
let Predicates = [HasAVX10_2_512] in
- defm NEPBF16Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
- sched.PH.ZMM, v32bf16_info>,
- EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.ZMM, v32bf16_info>,
+ EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm NEPBF16Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
- sched.PH.XMM, v8bf16x_info>,
- EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
- defm NEPBF16Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
- sched.PH.YMM, v16bf16x_info>,
- EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.XMM, v8bf16x_info>,
+ EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
+ defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pbf16"),
+ sched.PH.YMM, v16bf16x_info>,
+ EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in
-defm VSQRT : avx10_sqrt_packed_bf16<0x51, "vsqrtne", SchedWriteFSqrtSizes>;
+defm VSQRTNEPBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrtne", SchedWriteFSqrtSizes>;
// VRSQRTPBF16, VRCPPBF16, VSRQTPBF16, VGETEXPPBF16
multiclass avx10_fp14_pbf16<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86SchedWriteWidths sched> {
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
defm PBF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
- OpNode, sched.ZMM, v32bf16_info>,
- EVEX_V512;
+ OpNode, sched.ZMM, v32bf16_info>,
+ EVEX_V512;
let Predicates = [HasAVX10_2] in {
defm PBF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
- OpNode, sched.XMM, v8bf16x_info>,
- EVEX_V128;
+ OpNode, sched.XMM, v8bf16x_info>,
+ EVEX_V128;
defm PBF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pbf16"),
- OpNode, sched.YMM, v16bf16x_info>,
- EVEX_V256;
+ OpNode, sched.YMM, v16bf16x_info>,
+ EVEX_V256;
}
}
defm VRSQRT : avx10_fp14_pbf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>,
- T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
defm VRCP : avx10_fp14_pbf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>,
- T_MAP6, PS, EVEX_CD8<16, CD8VF>;
+ T_MAP6, PS, EVEX_CD8<16, CD8VF>;
defm VGETEXP : avx10_fp14_pbf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>,
- T_MAP5, EVEX_CD8<16, CD8VF>;
+ T_MAP5, EVEX_CD8<16, CD8VF>;
// VSCALEFPBF16
multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
- EVEX_V512, T_MAP6,PS, EVEX_CD8<16, CD8VF>;
+ defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
+ EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
- EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
- defm PBF16Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
- EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6,PS;
+ defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
+ defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in
-defm VSCALEF : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
+defm VSCALEFPBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
// VREDUCENEPBF16, VRNDSCALENEPBF16, VGETMANTPBF16
multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.XMM, _.info128>, EVEX_V128;
- defm PBF16Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.YMM, _.info256>, EVEX_V256;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VREDUCENE : avx10_common_unary_fp_packed_imm_bf16<"vreducene", avx512vl_bf16_info, 0x56,
+defm VREDUCENEPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreducene", avx512vl_bf16_info, 0x56,
X86VReduce, X86VReduce, SchedWriteFRnd>,
AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
-defm VRNDSCALENE : avx10_common_unary_fp_packed_imm_bf16<"vrndscalene", avx512vl_bf16_info, 0x08,
+defm VRNDSCALENEPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscalene", avx512vl_bf16_info, 0x08,
X86any_VRndScale, X86VRndScale, SchedWriteFRnd>,
AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
-defm VGETMANT : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
+defm VGETMANTPBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
X86VGetMant, X86VGetMant, SchedWriteFRnd>,
AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
}
@@ -1122,46 +1125,46 @@ defm VGETMANT : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_
multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
+ defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
avx512vl_bf16_info.info512, "z">, EVEX_V512;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
+ defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
avx512vl_bf16_info.info128, "x">, EVEX_V128;
- defm PBF16Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
+ defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
avx512vl_bf16_info.info256, "y">, EVEX_V256;
}
}
// FIXME: need to set Uses = []<Register> but avx512_vector_fpclass has InstAlias.
-defm VFPCLASS : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
- AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
+defm VFPCLASSPBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
+ AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
// VF[,N]M[ADD,SUB][132,213,231]NEPBF16
multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDNode MaskOpNode,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
- defm PBF16Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VFMADD213NE : avx10_fma3p_213_bf16<0xA8, "vfmadd213nepbf16", any_fma,
+defm VFMADD213NEPBF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213nepbf16", any_fma,
fma, SchedWriteFMA>;
-defm VFMSUB213NE : avx10_fma3p_213_bf16<0xAA, "vfmsub213nepbf16", X86any_Fmsub,
+defm VFMSUB213NEPBF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213nepbf16", X86any_Fmsub,
X86Fmsub, SchedWriteFMA>;
-defm VFNMADD213NE : avx10_fma3p_213_bf16<0xAC, "vfnmadd213nepbf16", X86any_Fnmadd,
+defm VFNMADD213NEPBF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213nepbf16", X86any_Fnmadd,
X86Fnmadd, SchedWriteFMA>;
-defm VFNMSUB213NE : avx10_fma3p_213_bf16<0xAE, "vfnmsub213nepbf16", X86any_Fnmsub,
+defm VFNMSUB213NEPBF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
@@ -1169,27 +1172,27 @@ multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDNode MaskOpNode,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
- defm PBF16Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
EVEX_CD8<16, CD8VF>;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VFMADD231NE : avx10_fma3p_231_bf16<0xB8, "vfmadd231nepbf16", any_fma,
+defm VFMADD231NEPBF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231nepbf16", any_fma,
fma, SchedWriteFMA>;
-defm VFMSUB231NE : avx10_fma3p_231_bf16<0xBA, "vfmsub231nepbf16", X86any_Fmsub,
+defm VFMSUB231NEPBF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231nepbf16", X86any_Fmsub,
X86Fmsub, SchedWriteFMA>;
-defm VFNMADD231NE : avx10_fma3p_231_bf16<0xBC, "vfnmadd231nepbf16", X86any_Fnmadd,
+defm VFNMADD231NEPBF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231nepbf16", X86any_Fnmadd,
X86Fnmadd, SchedWriteFMA>;
-defm VFNMSUB231NE : avx10_fma3p_231_bf16<0xBE, "vfnmsub231nepbf16", X86any_Fnmsub,
+defm VFNMSUB231NEPBF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
@@ -1197,26 +1200,26 @@ multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, SDNode MaskOpNode,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
- defm PBF16Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
- EVEX_CD8<16, CD8VF>;
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
let Predicates = [HasAVX10_2] in {
- defm PBF16Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
- EVEX_CD8<16, CD8VF>;
- defm PBF16Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
- EVEX_CD8<16, CD8VF>;
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
+ EVEX_CD8<16, CD8VF>;
}
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VFMADD132NE : avx10_fma3p_132_bf16<0x98, "vfmadd132nepbf16", any_fma,
- fma, SchedWriteFMA>;
-defm VFMSUB132NE : avx10_fma3p_132_bf16<0x9A, "vfmsub132nepbf16", X86any_Fmsub,
- X86Fmsub, SchedWriteFMA>;
-defm VFNMADD132NE : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_Fnmadd,
- X86Fnmadd, SchedWriteFMA>;
-defm VFNMSUB132NE : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
- X86Fnmsub, SchedWriteFMA>;
+defm VFMADD132NEPBF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132nepbf16", any_fma,
+ fma, SchedWriteFMA>;
+defm VFMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132nepbf16", X86any_Fmsub,
+ X86Fmsub, SchedWriteFMA>;
+defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_Fnmadd,
+ X86Fnmadd, SchedWriteFMA>;
+defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
+ X86Fnmsub, SchedWriteFMA>;
}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index cafb7b45a8dff5..59bfd2bcbabc26 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -212,11 +212,6 @@ def X86CmpMaskCC :
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
-def X86CmpMaskCC_Int :
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
- SDTCisVec<1>, SDTCisSameAs<2, 1>,
- SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i32>]>;
-
def X86MaskCmpMaskCC :
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
index d574d54b9ad792..7b81d547db085c 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
@@ -60,72 +60,6 @@ define <32 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_512(<32 x bfloat> %x1
ret <32 x bfloat> %res1
}
-declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
-declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
-declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
-declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
-declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
-declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
-
-define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
-; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
-; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
-; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
-; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
-; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
- ret i32 %res
-}
-
-define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
-; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
-; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
- ret i32 %res
-}
-
-define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
-; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
-; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
- ret i32 %res
-}
-
-define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
-; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
-; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
- ret i32 %res
-}
-
-define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
-; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
-; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
-; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
-; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
-; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
- ret i32 %res
-}
-
declare <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.512(<32 x bfloat>, <32 x bfloat>, i32)
define <32 x bfloat> @test_rsqrt_nepbf16_512(<32 x bfloat> %a0) {
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
index f0d3ed239662f7..559d866b55cc7b 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
@@ -118,6 +118,72 @@ define <8 x bfloat> @test_int_x86_avx10_maskz_max_nepbf16_128(<8 x bfloat> %x1,
ret <8 x bfloat> %res1
}
+declare i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16gt(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat>, <8 x bfloat>)
+declare i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat>, <8 x bfloat>)
+
+define i32 @test_x86_avx10_com_nesbf16_eq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_eq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0]
+; CHECK-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
+; CHECK-NEXT: andb %al, %cl # encoding: [0x20,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16eq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_lt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_lt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: seta %al # encoding: [0x0f,0x97,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16lt(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_le(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm0, %xmm1 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc8]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16le(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_gt(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_gt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16ge(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx10_com_nesbf16_neq(<8 x bfloat> %a0, <8 x bfloat> %a1) {
+; CHECK-LABEL: test_x86_avx10_com_nesbf16_neq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcomsbf16 %xmm1, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x2f,0xc1]
+; CHECK-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0]
+; CHECK-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1]
+; CHECK-NEXT: orb %al, %cl # encoding: [0x08,0xc1]
+; CHECK-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx10.vcomsbf16neq(<8 x bfloat> %a0, <8 x bfloat> %a1)
+ ret i32 %res
+}
+
declare <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.128(<8 x bfloat>, <8 x bfloat>, i8)
declare <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.nepbf16.256(<16 x bfloat>, <16 x bfloat>, i16)
>From cc79c56242734ae497b03a4dd472b8ec4aab3028 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 29 Aug 2024 16:50:05 +0800
Subject: [PATCH 3/5] address comments
---
llvm/lib/Target/X86/X86InstrAVX10.td | 10 ++++++----
llvm/lib/Target/X86/X86InstrAVX512.td | 4 ++--
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index bf16849331ee48..b0eb210b687b19 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1126,16 +1126,18 @@ multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX10_2_512] in
defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
- avx512vl_bf16_info.info512, "z">, EVEX_V512;
+ avx512vl_bf16_info.info512, "z",
+ []<Register>>, EVEX_V512;
let Predicates = [HasAVX10_2] in {
defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
- avx512vl_bf16_info.info128, "x">, EVEX_V128;
+ avx512vl_bf16_info.info128, "x",
+ []<Register>>, EVEX_V128;
defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
- avx512vl_bf16_info.info256, "y">, EVEX_V256;
+ avx512vl_bf16_info.info256, "y",
+ []<Register>>, EVEX_V256;
}
}
-// FIXME: need to set Uses = []<Register> but avx512_vector_fpclass has InstAlias.
defm VFPCLASSPBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 88d1eb59862433..c9885242131238 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2495,8 +2495,8 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
// fpclass(reg_vec, broadcast(eltVt), imm)
multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string mem>{
- let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
+ string mem, list<Register> _Uses = [MXCSR]>{
+ let ExeDomain = _.ExeDomain, Uses = _Uses in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>From b9e35f050d6319ebd8da3c0f4cf5399dd03b3915 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 2 Sep 2024 15:32:47 +0800
Subject: [PATCH 4/5] address comments
---
llvm/lib/Target/X86/X86InstrFMA3Info.cpp | 35 +-
llvm/test/CodeGen/X86/avx10.2-fma-commute.ll | 1244 ++++++++++++++++++
2 files changed, 1267 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 5825fffc770b03..fe2bd4f239359e 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -43,11 +43,14 @@ using namespace llvm;
FMA3GROUP(Name, Suf##m, Attrs) \
FMA3GROUP(Name, Suf##r, Attrs)
-#define FMA3GROUP_PACKED(Name, Attrs) \
+#define FMA3GROUP_PACKED_DHS(Name, Attrs) \
FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
+#define FMA3GROUP_PACKED_BF16(Name, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_Z(Name, NEPBF16, Attrs)
+
#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##Zm, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
@@ -67,14 +70,15 @@ using namespace llvm;
FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
#define FMA3GROUP_FULL(Name, Attrs) \
- FMA3GROUP_PACKED(Name, Attrs) \
+ FMA3GROUP_PACKED_BF16(Name, Attrs) \
+ FMA3GROUP_PACKED_DHS(Name, Attrs) \
FMA3GROUP_SCALAR(Name, Attrs)
static const X86InstrFMA3Group Groups[] = {
FMA3GROUP_FULL(VFMADD, 0)
- FMA3GROUP_PACKED(VFMADDSUB, 0)
+ FMA3GROUP_PACKED_DHS(VFMADDSUB, 0)
FMA3GROUP_FULL(VFMSUB, 0)
- FMA3GROUP_PACKED(VFMSUBADD, 0)
+ FMA3GROUP_PACKED_DHS(VFMSUBADD, 0)
FMA3GROUP_FULL(VFNMADD, 0)
FMA3GROUP_FULL(VFNMSUB, 0)
};
@@ -84,7 +88,13 @@ static const X86InstrFMA3Group Groups[] = {
FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
-#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
+#define FMA3GROUP_PACKED_AVX512_ALL(Name, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, NEPBF16, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
+
+#define FMA3GROUP_PACKED_AVX512_DHS(Name, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
@@ -106,12 +116,12 @@ static const X86InstrFMA3Group Groups[] = {
FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
static const X86InstrFMA3Group BroadcastGroups[] = {
- FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
- FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
- FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
- FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
- FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
- FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
+ FMA3GROUP_PACKED_AVX512_ALL(VFMADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512_DHS(VFMADDSUB, mb, 0)
+ FMA3GROUP_PACKED_AVX512_ALL(VFMSUB, mb, 0)
+ FMA3GROUP_PACKED_AVX512_DHS(VFMSUBADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512_ALL(VFNMADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512_ALL(VFNMSUB, mb, 0)
};
static const X86InstrFMA3Group RoundGroups[] = {
@@ -153,7 +163,8 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
((TSFlags & X86II::OpMapMask) == X86II::T8 ||
(TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
- bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
+ bool IsFMA3Prefix = ((TSFlags & X86II::OpPrefixMask) == X86II::PD) ||
+ ((TSFlags & X86II::OpPrefixMask) == 0); // X86II::PS
if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
return nullptr;
diff --git a/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
new file mode 100644
index 00000000000000..c41e03ba637cbe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10.2-fma-commute.ll
@@ -0,0 +1,1244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 | FileCheck %s
+
+define <8 x bfloat> @fma_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z) {
+; CHECK-LABEL: fma_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ ret <8 x bfloat> %a
+}
+
+define <8 x bfloat> @fma_mask_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %xmm1, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_mask_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z, i8 %mask) {
+; CHECK-LABEL: fma_maskz_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %xmm1, %xmm2, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_mask_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_mask_load_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> %x
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_123_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_123_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_213_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_213_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %x, <8 x bfloat> %z)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_231_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_231_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %y, <8 x bfloat> %z, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_321_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_321_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %y, <8 x bfloat> %x)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_132_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_132_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %x, <8 x bfloat> %z, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <8 x bfloat> @fma_maskz_load_312_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y, ptr %zp, i8 %mask) {
+; CHECK-LABEL: fma_maskz_load_312_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <8 x bfloat>, ptr %zp
+ %a = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %z, <8 x bfloat> %x, <8 x bfloat> %y)
+ %b = bitcast i8 %mask to <8 x i1>
+ %c = select <8 x i1> %b, <8 x bfloat> %a, <8 x bfloat> zeroinitializer
+ ret <8 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z) {
+; CHECK-LABEL: fma_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ ret <16 x bfloat> %a
+}
+
+define <16 x bfloat> @fma_mask_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %ymm1, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %ymm2, %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_mask_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z, i16 %mask) {
+; CHECK-LABEL: fma_maskz_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %ymm1, %ymm2, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_mask_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_mask_load_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> %x
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_123_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_123_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_213_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_213_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %x, <16 x bfloat> %z)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_231_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_231_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %y, <16 x bfloat> %z, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_321_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_321_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %y, <16 x bfloat> %x)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_132_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_132_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %x, <16 x bfloat> %z, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <16 x bfloat> @fma_maskz_load_312_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y, ptr %zp, i16 %mask) {
+; CHECK-LABEL: fma_maskz_load_312_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <16 x bfloat>, ptr %zp
+ %a = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %z, <16 x bfloat> %x, <16 x bfloat> %y)
+ %b = bitcast i16 %mask to <16 x i1>
+ %c = select <16 x i1> %b, <16 x bfloat> %a, <16 x bfloat> zeroinitializer
+ ret <16 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z) {
+; CHECK-LABEL: fma_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp) {
+; CHECK-LABEL: fma_load_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ ret <32 x bfloat> %a
+}
+
+define <32 x bfloat> @fma_mask_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_mask_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z, i32 %mask) {
+; CHECK-LABEL: fma_maskz_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_mask_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_mask_load_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> %x
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_123_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_123_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_213_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_213_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd213nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %x, <32 x bfloat> %z)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_231_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_231_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %y, <32 x bfloat> %z, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_321_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_321_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd231nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %y, <32 x bfloat> %x)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_132_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_132_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %x, <32 x bfloat> %z, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
+
+define <32 x bfloat> @fma_maskz_load_312_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y, ptr %zp, i32 %mask) {
+; CHECK-LABEL: fma_maskz_load_312_v32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vfmadd132nepbf16 (%rdi), %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %z = load <32 x bfloat>, ptr %zp
+ %a = call <32 x bfloat> @llvm.fma.v32bf16(<32 x bfloat> %z, <32 x bfloat> %x, <32 x bfloat> %y)
+ %b = bitcast i32 %mask to <32 x i1>
+ %c = select <32 x i1> %b, <32 x bfloat> %a, <32 x bfloat> zeroinitializer
+ ret <32 x bfloat> %c
+}
>From a6c4fce4069fb81ab4a50d825fe6039d45827d63 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 3 Sep 2024 17:25:45 +0800
Subject: [PATCH 5/5] address comments
---
llvm/lib/Target/X86/X86InstrFMA3Info.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index fe2bd4f239359e..94de164d5f0785 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -163,8 +163,8 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
((TSFlags & X86II::OpMapMask) == X86II::T8 ||
(TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
- bool IsFMA3Prefix = ((TSFlags & X86II::OpPrefixMask) == X86II::PD) ||
- ((TSFlags & X86II::OpPrefixMask) == 0); // X86II::PS
+ bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD ||
+ (TSFlags & X86II::OpPrefixMask) == 0; // X86II::PS
if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
return nullptr;
More information about the cfe-commits
mailing list