[llvm] 0dba538 - [X86][AVX10.2] Support YMM rounding new instructions (#101825)

via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 4 06:05:50 PDT 2024


Author: Phoebe Wang
Date: 2024-08-04T21:05:45+08:00
New Revision: 0dba5381d8c8e4cadc32a067bf2fe5e3486ae53d

URL: https://github.com/llvm/llvm-project/commit/0dba5381d8c8e4cadc32a067bf2fe5e3486ae53d
DIFF: https://github.com/llvm/llvm-project/commit/0dba5381d8c8e4cadc32a067bf2fe5e3486ae53d.diff

LOG: [X86][AVX10.2] Support YMM rounding new instructions (#101825)

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/avx10_2niintrin.h
    clang/lib/Sema/SemaX86.cpp
    clang/test/CodeGen/X86/avx10_2ni-builtins.c
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
    llvm/lib/Target/X86/X86InstrAVX10.td
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/lib/Target/X86/X86InstrFMA3Info.cpp
    llvm/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
    llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
    llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
    llvm/test/MC/X86/avx10_2ni-32-intel.s
    llvm/test/MC/X86/avx10_2ni-64-att.s
    llvm/utils/TableGen/X86DisassemblerTables.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..b117c6d6d9340 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1966,6 +1966,126 @@ TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2
 TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppd256_round_mask, "UcV4dV4dIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmpph256_round_mask, "UsV16xV16xIiUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmpps256_round_mask, "UcV8fV8fIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_round_mask, "V8xV8iV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ps256_round_mask, "V8fV8iV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_round_mask, "V8xV4dV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ps256_round_mask, "V4fV4dV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_round_mask, "V4dV8xV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_round_mask, "V8fV8xV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2pd256_round_mask, "V4dV4fV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_round_mask, "V8xV8fV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2pd256_round_mask, "V4dV4LLiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_round_mask, "V8xV4LLiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ps256_round_mask, "V4fV4LLiV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_round_mask, "V8xV8UiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ps256_round_mask, "V8fV8UiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2pd256_round_mask, "V4dV4ULLiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_round_mask, "V8xV4ULLiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ps256_round_mask, "V4fV4ULLiV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_round_mask, "V16xV16UsV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_round_mask, "V16xV16sV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_mask, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_maskz, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_mask, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_maskz, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppd256_round_mask, "V4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexpph256_round_mask, "V16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexpps256_round_mask, "V8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrangepd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrangeps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreduceph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreduceps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscaleph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscaleps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtpd256_round, "V4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtph256_round, "V16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtps256_round, "V8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
 
 // AVX-VNNI-INT16
 TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 13caab6c42111..51d1162c6e403 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13938,6 +13938,54 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
     IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
     break;
+  case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
+    break;
+  case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
+    break;
+  case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
+    break;
+  case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
+    break;
+  case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
+    break;
+  case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
+    Subtract = true;
+    LLVM_FALLTHROUGH;
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
+    IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
+    break;
   }
 
   Value *A = Ops[0];
@@ -13977,6 +14025,12 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
   case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
     MaskFalseVal = Ops[0];
     break;
   case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
@@ -13985,6 +14039,12 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
   case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
   case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
     MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
     break;
   case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
@@ -13999,6 +14059,18 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
   case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
   case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
   case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
+  case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
     MaskFalseVal = Ops[2];
     break;
   }
@@ -14686,6 +14758,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vcvtw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
@@ -14693,6 +14771,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
 
   case X86::BI__builtin_ia32_vfmaddss3:
@@ -14736,6 +14820,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vfmaddpd512_mask3:
   case X86::BI__builtin_ia32_vfmsubpd512_mask3:
   case X86::BI__builtin_ia32_vfmsubph512_mask3:
+  case X86::BI__builtin_ia32_vfmaddph256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddps256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
     return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
   case X86::BI__builtin_ia32_vfmaddsubph512_mask:
   case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
@@ -14749,6 +14845,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
   case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
   case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
     return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
 
   case X86::BI__builtin_ia32_movdqa32store128_mask:
@@ -16329,6 +16437,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_cmppd128_mask:
   case X86::BI__builtin_ia32_cmppd256_mask:
   case X86::BI__builtin_ia32_cmppd512_mask:
+  case X86::BI__builtin_ia32_vcmppd256_round_mask:
+  case X86::BI__builtin_ia32_vcmpps256_round_mask:
+  case X86::BI__builtin_ia32_vcmpph256_round_mask:
     IsMaskFCmp = true;
     [[fallthrough]];
   case X86::BI__builtin_ia32_cmpps:
@@ -16832,6 +16943,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
     return EmitX86Select(*this, Ops[3], Call, Ops[0]);
   }
+  case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
+    IsConjFMA = true;
+    LLVM_FALLTHROUGH;
+  case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
+    Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
+                                  : Intrinsic::x86_avx10_mask_vfmaddcph256;
+    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+    return EmitX86Select(*this, Ops[3], Call, Ops[0]);
+  }
   case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
     IsConjFMA = true;
     [[fallthrough]];

diff  --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h
index 3527e0eaf5c89..42b24d2b5b18f 100644
--- a/clang/lib/Headers/avx10_2niintrin.h
+++ b/clang/lib/Headers/avx10_2niintrin.h
@@ -79,5 +79,1628 @@
       (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)),               \
       (__v8sf)_mm256_setzero_ps()))
 
+#define _mm256_cmp_round_pd_mask(A, B, P, R)                                   \
+  ((__mmask8)__builtin_ia32_vcmppd256_round_mask(                              \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)-1,      \
+      (int)(R)))
+
+#define _mm256_mask_cmp_round_pd_mask(U, A, B, P, R)                           \
+  ((__mmask8)__builtin_ia32_vcmppd256_round_mask(                              \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_cmp_round_ph_mask(A, B, P, R)                                   \
+  ((__mmask16)__builtin_ia32_vcmpph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)-1,   \
+      (int)(R)))
+
+#define _mm256_mask_cmp_round_ph_mask(U, A, B, P, R)                           \
+  ((__mmask16)__builtin_ia32_vcmpph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)(U),  \
+      (int)(R)))
+
+#define _mm256_cmp_round_ps_mask(A, B, P, R)                                   \
+  ((__mmask8)__builtin_ia32_vcmpps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)-1,        \
+      (int)(R)))
+
+#define _mm256_mask_cmp_round_ps_mask(U, A, B, P, R)                           \
+  ((__mmask8)__builtin_ia32_vcmpps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)(U),       \
+      (int)(R)))
+
+#define _mm256_cvt_roundepi32_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask(                            \
+      (__v8si)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundepi32_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask((__v8si)(A), (__v8hf)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi32_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask(                            \
+      (__v8si)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundepi32_ps(A, R)                                         \
+  ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A),        \
+                                                  (__v8sf)_mm256_setzero_ps(), \
+                                                  (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_cvt_roundepi32_ps(W, U, A, R)                              \
+  ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask(                             \
+      (__v8si)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi32_ps(U, A, R)                                \
+  ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A),        \
+                                                  (__v8sf)_mm256_setzero_ps(), \
+                                                  (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundpd_epi32(A, R)                                         \
+  ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_epi32(W, U, A, R)                              \
+  ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_epi32(U, A, R)                                \
+  ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvt_roundpd_ph(A, R)                                            \
+  ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask(                            \
+      (__v4df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_ph(W, U, A, R)                                 \
+  ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask((__v4df)(A), (__v8hf)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_ph(U, A, R)                                   \
+  ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask(                            \
+      (__v4df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundpd_ps(A, R)                                            \
+  ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_ps(W, U, A, R)                                 \
+  ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_ps(U, A, R)                                   \
+  ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask((__v4df)(__m256d)(A),        \
+                                                  (__v4sf)_mm_setzero_ps(),    \
+                                                  (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundpd_epi64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1,      \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_epi64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_epi64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_cvt_roundpd_epu32(A, R)                                         \
+  ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_epu32(W, U, A, R)                              \
+  ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_epu32(U, A, R)                                \
+  ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvt_roundpd_epu64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1,      \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundpd_epu64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundpd_epu64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_cvt_roundph_epi32(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask(                            \
+      (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epi32(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask((__v8hf)(A), (__v8si)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epi32(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask(                            \
+      (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundph_pd(A, R)                                            \
+  ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask(                            \
+      (__v8hf)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundph_pd(W, U, A, R)                                 \
+  ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask((__v8hf)(A), (__v4df)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_pd(U, A, R)                                   \
+  ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask(                            \
+      (__v8hf)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtx_roundph_ps(A, R)                                           \
+  ((__m256)__builtin_ia32_vcvtph2psx256_round_mask(                            \
+      (__v8hf)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvtx_roundph_ps(W, U, A, R)                                \
+  ((__m256)__builtin_ia32_vcvtph2psx256_round_mask((__v8hf)(A), (__v8sf)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtx_roundph_ps(U, A, R)                                  \
+  ((__m256)__builtin_ia32_vcvtph2psx256_round_mask(                            \
+      (__v8hf)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundph_epi64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask(                            \
+      (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epi64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask((__v8hf)(A), (__v4di)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epi64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask(                            \
+      (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundph_epu32(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask(                           \
+      (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epu32(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask((__v8hf)(A), (__v8su)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epu32(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask(                           \
+      (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundph_epu64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask(                           \
+      (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epu64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask((__v8hf)(A), (__v4du)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epu64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask(                           \
+      (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundph_epu16(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask(                            \
+      (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epu16(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask((__v16hf)(A), (__v16hu)(W), \
+                                                   (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epu16(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask(                            \
+      (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),           \
+      (int)(R)))
+
+#define _mm256_cvt_roundph_epi16(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtph2w256_round_mask(                             \
+      (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundph_epi16(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtph2w256_round_mask((__v16hf)(A), (__v16hi)(W),  \
+                                                  (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundph_epi16(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtph2w256_round_mask(                             \
+      (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U),           \
+      (int)(R)))
+
+#define _mm256_cvt_roundps_epi32(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask(                            \
+      (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundps_epi32(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask(                            \
+      (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundps_epi32(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask(                            \
+      (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvt_roundps_pd(A, R)                                            \
+  ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1,        \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundps_pd(W, U, A, R)                                 \
+  ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundps_pd(U, A, R)                                   \
+  ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U),         \
+      (int)(R)))
+
+#define _mm256_cvt_roundps_ph(A, I)                                            \
+  ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I),    \
+                                             (__v8hi)_mm_undefined_si128(),    \
+                                             (__mmask8)-1))
+
+/* FIXME: We may use these way in future.
+#define _mm256_cvt_roundps_ph(A, I)                                            \
+  ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_undefined_si128(),            \
+      (__mmask8)-1))
+#define _mm256_mask_cvt_roundps_ph(U, W, A, I)                                 \
+  ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(I), (__v8hi)(__m128i)(U), (__mmask8)(W)))
+#define _mm256_maskz_cvt_roundps_ph(W, A, I)                                   \
+  ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_setzero_si128(),              \
+      (__mmask8)(W))) */
+
+#define _mm256_cvtx_roundps_ph(A, R)                                           \
+  ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask(                           \
+      (__v8sf)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvtx_roundps_ph(W, U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask((__v8sf)(A), (__v8hf)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtx_roundps_ph(U, A, R)                                  \
+  ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask(                           \
+      (__v8sf)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundps_epi64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundps_epi64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundps_epi64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask(                            \
+      (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvt_roundps_epu32(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundps_epu32(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundps_epu32(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvt_roundps_epu64(A, R)                                         \
+  ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundps_epu64(W, U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundps_epu64(U, A, R)                                \
+  ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvt_roundepi64_pd(A, R)                                         \
+  ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask(                            \
+      (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundepi64_pd(W, U, A, R)                              \
+  ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask(                            \
+      (__v4di)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi64_pd(U, A, R)                                \
+  ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask(                            \
+      (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvt_roundepi64_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask(                            \
+      (__v4di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundepi64_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask((__v4di)(A), (__v8hf)(W),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi64_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask(                            \
+      (__v4di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundepi64_ps(A, R)                                         \
+  ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask(                             \
+      (__v4di)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_cvt_roundepi64_ps(W, U, A, R)                              \
+  ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask(                             \
+      (__v4di)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi64_ps(U, A, R)                                \
+  ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask((__v4di)(__m256i)(A),        \
+                                                  (__v4sf)_mm_setzero_ps(),    \
+                                                  (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtt_roundpd_epi32(A, R)                                        \
+  ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R)                             \
+  ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundpd_epi32(U, A, R)                               \
+  ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvtt_roundpd_epi64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1,      \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundpd_epi64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask(                           \
+      (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_cvtt_roundpd_epu32(A, R)                                        \
+  ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R)                             \
+  ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundpd_epu32(U, A, R)                               \
+  ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvtt_roundpd_epu64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1,      \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundpd_epu64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_cvtt_roundph_epi32(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask(                           \
+      (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epi32(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask((__v8hf)(A), (__v8si)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epi32(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask(                           \
+      (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtt_roundph_epi64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask(                           \
+      (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epi64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask((__v8hf)(A), (__v4di)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epi64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask(                           \
+      (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtt_roundph_epu32(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask(                          \
+      (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epu32(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \
+                                                     (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epu32(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask(                          \
+      (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtt_roundph_epu64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask(                          \
+      (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epu64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \
+                                                     (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epu64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask(                          \
+      (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvtt_roundph_epu16(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask(                           \
+      (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epu16(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask(                           \
+      (__v16hf)(A), (__v16hu)(W), (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epu16(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask(                           \
+      (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),           \
+      (int)(R)))
+
+#define _mm256_cvtt_roundph_epi16(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttph2w256_round_mask(                            \
+      (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundph_epi16(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \
+                                                   (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundph_epi16(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttph2w256_round_mask(                            \
+      (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U),           \
+      (int)(R)))
+
+#define _mm256_cvtt_roundps_epi32(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundps_epi32(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundps_epi32(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvtt_roundps_epi64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundps_epi64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundps_epi64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask(                           \
+      (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvtt_roundps_epu32(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundps_epu32(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundps_epu32(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvtt_roundps_epu64(A, R)                                        \
+  ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask(                          \
+      (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_cvtt_roundps_epu64(W, U, A, R)                             \
+  ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask(                          \
+      (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvtt_roundps_epu64(U, A, R)                               \
+  ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask(                          \
+      (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U),      \
+      (int)(R)))
+
+#define _mm256_cvt_roundepu32_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask(                           \
+      (__v8su)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundepu32_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask((__v8su)(A), (__v8hf)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu32_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask(                           \
+      (__v8su)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundepu32_ps(A, R)                                         \
+  ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask(                            \
+      (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundepu32_ps(W, U, A, R)                              \
+  ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask(                            \
+      (__v8su)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu32_ps(U, A, R)                                \
+  ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask(                            \
+      (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvt_roundepu64_pd(A, R)                                         \
+  ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask(                           \
+      (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1,         \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundepu64_pd(W, U, A, R)                              \
+  ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask(                           \
+      (__v4du)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu64_pd(U, A, R)                                \
+  ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask(                           \
+      (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_cvt_roundepu64_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask(                           \
+      (__v4du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm256_mask_cvt_roundepu64_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask((__v4du)(A), (__v8hf)(W),  \
+                                                    (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu64_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask(                           \
+      (__v4du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundepu64_ps(A, R)                                         \
+  ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask(                            \
+      (__v4du)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_cvt_roundepu64_ps(W, U, A, R)                              \
+  ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask(                            \
+      (__v4du)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu64_ps(U, A, R)                                \
+  ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask((__v4du)(__m256i)(A),       \
+                                                   (__v4sf)_mm_setzero_ps(),   \
+                                                   (__mmask8)(U), (int)(R)))
+
+#define _mm256_cvt_roundepu16_ph(A, R)                                         \
+  ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask(                            \
+      (__v16hu)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundepu16_ph(W, U, A, R)                              \
+  ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask((__v16hu)(A), (__v16hf)(W), \
+                                                   (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepu16_ph(U, A, R)                                \
+  ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask(                            \
+      (__v16hu)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+#define _mm256_cvt_roundepi16_ph(A, R)                                         \
+  ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask(                             \
+      (__v16hi)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1),           \
+      (int)(R)))
+
+#define _mm256_mask_cvt_roundepi16_ph(W, U, A, R)                              \
+  ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask((__v16hi)(A), (__v16hf)(W),  \
+                                                  (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_cvt_roundepi16_ph(U, A, R)                                \
+  ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask(                             \
+      (__v16hi)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+#define _mm256_div_round_pd(A, B, R)                                           \
+  ((__m256d)__builtin_ia32_vdivpd256_round((__v4df)(__m256d)(A),               \
+                                           (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_div_round_pd(W, U, A, B, R)                                \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)),               \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_div_round_pd(U, A, B, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)),               \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_div_round_ph(A, B, R)                                           \
+  ((__m256h)__builtin_ia32_vdivph256_round((__v16hf)(__m256h)(A),              \
+                                           (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_div_round_ph(W, U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)),             \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_div_round_ph(U, A, B, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)),             \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_div_round_ps(A, B, R)                                           \
+  ((__m256)__builtin_ia32_vdivps256_round((__v8sf)(__m256)(A),                 \
+                                          (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_div_round_ps(W, U, A, B, R)                                \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)),               \
+      (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_div_round_ps(U, A, B, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)),               \
+      (__v8sf)_mm256_setzero_ps()))
+
+#define _mm256_fcmadd_round_pch(A, B, C, R)                                    \
+  ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3(                          \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fcmadd_round_pch(A, U, B, C, R)                            \
+  ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask(                           \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fcmadd_round_pch(A, B, C, U, R)                           \
+  ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3(                          \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fcmadd_round_pch(U, A, B, C, R)                           \
+  ((__m256h)__builtin_ia32_vfcmaddcph256_round_maskz(                          \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_cmul_round_pch(A, B, R)                                         \
+  ((__m256h)__builtin_ia32_vfcmulcph256_round_mask(                            \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B),                              \
+      (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_cmul_round_pch(W, U, A, B, R)                              \
+  ((__m256h)__builtin_ia32_vfcmulcph256_round_mask(                            \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_cmul_round_pch(U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_vfcmulcph256_round_mask(                            \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B),                              \
+      (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_fixupimm_round_pd(A, B, C, imm, R)                              \
+  ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C),        \
+      (int)(imm), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fixupimm_round_pd(A, U, B, C, imm, R)                      \
+  ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C),        \
+      (int)(imm), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fixupimm_round_pd(U, A, B, C, imm, R)                     \
+  ((__m256d)__builtin_ia32_vfixupimmpd256_round_maskz(                         \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C),        \
+      (int)(imm), (__mmask8)(U), (int)(R)))
+
+#define _mm256_fixupimm_round_ps(A, B, C, imm, R)                              \
+  ((__m256)__builtin_ia32_vfixupimmps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C),          \
+      (int)(imm), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fixupimm_round_ps(A, U, B, C, imm, R)                      \
+  ((__m256)__builtin_ia32_vfixupimmps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C),          \
+      (int)(imm), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fixupimm_round_ps(U, A, B, C, imm, R)                     \
+  ((__m256)__builtin_ia32_vfixupimmps256_round_maskz(                          \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C),          \
+      (int)(imm), (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmadd_round_pd(A, B, C, R)                                      \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmadd_round_pd(A, U, B, C, R)                              \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmadd_round_pd(A, B, C, U, R)                             \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmadd_round_pd(U, A, B, C, R)                             \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmsub_round_pd(A, B, C, R)                                      \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmsub_round_pd(A, U, B, C, R)                              \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmsub_round_pd(U, A, B, C, R)                             \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fnmadd_round_pd(A, B, C, R)                                     \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),       \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask3_fnmadd_round_pd(A, B, C, U, R)                            \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3(                            \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fnmadd_round_pd(U, A, B, C, R)                            \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz(                            \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fnmsub_round_pd(A, B, C, R)                                     \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),      \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_maskz_fnmsub_round_pd(U, A, B, C, R)                            \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz(                            \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),      \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmadd_round_ph(A, B, C, R)                                      \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_fmadd_round_ph(A, U, B, C, R)                              \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask3_fmadd_round_ph(A, B, C, U, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask3(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_fmadd_round_ph(U, A, B, C, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_maskz(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fmsub_round_ph(A, B, C, R)                                      \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_fmsub_round_ph(A, U, B, C, R)                              \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_fmsub_round_ph(U, A, B, C, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_maskz(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fnmadd_round_ph(A, B, C, R)                                     \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),    \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask3_fnmadd_round_ph(A, B, C, U, R)                            \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask3(                            \
+      -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_fnmadd_round_ph(U, A, B, C, R)                            \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_maskz(                            \
+      -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fnmsub_round_ph(A, B, C, R)                                     \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),   \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_maskz_fnmsub_round_ph(U, A, B, C, R)                            \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_maskz(                            \
+      -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),   \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fmadd_round_ps(A, B, C, R)                                      \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmadd_round_ps(A, U, B, C, R)                              \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmadd_round_ps(A, B, C, U, R)                             \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask3(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmadd_round_ps(U, A, B, C, R)                             \
+  ((__m256)__builtin_ia32_vfmaddps256_round_maskz(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmsub_round_ps(A, B, C, R)                                      \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmsub_round_ps(A, U, B, C, R)                              \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmsub_round_ps(U, A, B, C, R)                             \
+  ((__m256)__builtin_ia32_vfmaddps256_round_maskz(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fnmadd_round_ps(A, B, C, R)                                     \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C),          \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask3_fnmadd_round_ps(A, B, C, U, R)                            \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask3(                             \
+      -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fnmadd_round_ps(U, A, B, C, R)                            \
+  ((__m256)__builtin_ia32_vfmaddps256_round_maskz(                             \
+      -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fnmsub_round_ps(A, B, C, R)                                     \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),         \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_maskz_fnmsub_round_ps(U, A, B, C, R)                            \
+  ((__m256)__builtin_ia32_vfmaddps256_round_maskz(                             \
+      -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),         \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmadd_round_pch(A, B, C, R)                                     \
+  ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3(                           \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmadd_round_pch(A, U, B, C, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddcph256_round_mask(                            \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmadd_round_pch(A, B, C, U, R)                            \
+  ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3(                           \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmadd_round_pch(U, A, B, C, R)                            \
+  ((__m256h)__builtin_ia32_vfmaddcph256_round_maskz(                           \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmaddsub_round_pd(A, B, C, R)                                   \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmaddsub_round_pd(A, U, B, C, R)                           \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmaddsub_round_pd(A, B, C, U, R)                          \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask3(                         \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmaddsub_round_pd(U, A, B, C, R)                          \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz(                         \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmsubadd_round_pd(A, B, C, R)                                   \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmsubadd_round_pd(A, U, B, C, R)                           \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmsubadd_round_pd(U, A, B, C, R)                          \
+  ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz(                         \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmaddsub_round_ph(A, B, C, R)                                   \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask(                          \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_fmaddsub_round_ph(A, U, B, C, R)                           \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask(                          \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask3_fmaddsub_round_ph(A, B, C, U, R)                          \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask3(                         \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_fmaddsub_round_ph(U, A, B, C, R)                          \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz(                         \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fmsubadd_round_ph(A, B, C, R)                                   \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask(                          \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_fmsubadd_round_ph(A, U, B, C, R)                           \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask(                          \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_fmsubadd_round_ph(U, A, B, C, R)                          \
+  ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz(                         \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_fmaddsub_round_ps(A, B, C, R)                                   \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmaddsub_round_ps(A, U, B, C, R)                           \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmaddsub_round_ps(A, B, C, U, R)                          \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_mask3(                          \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmaddsub_round_ps(U, A, B, C, R)                          \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz(                          \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_fmsubadd_round_ps(A, B, C, R)                                   \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_fmsubadd_round_ps(A, U, B, C, R)                           \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_fmsubadd_round_ps(U, A, B, C, R)                          \
+  ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz(                          \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+#define _mm256_mask3_fmsub_round_pd(A, B, C, U, R)                             \
+  ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmsubadd_round_pd(A, B, C, U, R)                          \
+  ((__m256d)__builtin_ia32_vfmsubaddpd256_round_mask3(                         \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask_fnmadd_round_pd(A, U, B, C, R)                             \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), (__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask_fnmsub_round_pd(A, U, B, C, R)                             \
+  ((__m256d)__builtin_ia32_vfmaddpd256_round_mask(                             \
+      (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), -(__v4df)(__m256d)(C),      \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fnmsub_round_pd(A, B, C, U, R)                            \
+  ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3(                            \
+      -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmsub_round_ph(A, B, C, U, R)                             \
+  ((__m256h)__builtin_ia32_vfmsubph256_round_mask3(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask3_fmsubadd_round_ph(A, B, C, U, R)                          \
+  ((__m256h)__builtin_ia32_vfmsubaddph256_round_mask3(                         \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask_fnmadd_round_ph(A, U, B, C, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask_fnmsub_round_ph(A, U, B, C, R)                             \
+  ((__m256h)__builtin_ia32_vfmaddph256_round_mask(                             \
+      (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C),   \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask3_fnmsub_round_ph(A, B, C, U, R)                            \
+  ((__m256h)__builtin_ia32_vfmsubph256_round_mask3(                            \
+      -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C),    \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_mask3_fmsub_round_ps(A, B, C, U, R)                             \
+  ((__m256)__builtin_ia32_vfmsubps256_round_mask3(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fmsubadd_round_ps(A, B, C, U, R)                          \
+  ((__m256)__builtin_ia32_vfmsubaddps256_round_mask3(                          \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask_fnmadd_round_ps(A, U, B, C, R)                             \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask_fnmsub_round_ps(A, U, B, C, R)                             \
+  ((__m256)__builtin_ia32_vfmaddps256_round_mask(                              \
+      (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C),         \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask3_fnmsub_round_ps(A, B, C, U, R)                            \
+  ((__m256)__builtin_ia32_vfmsubps256_round_mask3(                             \
+      -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C),          \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mul_round_pch(A, B, R)                                          \
+  ((__m256h)__builtin_ia32_vfmulcph256_round_mask(                             \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B),                              \
+      (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_mul_round_pch(W, U, A, B, R)                               \
+  ((__m256h)__builtin_ia32_vfmulcph256_round_mask(                             \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_mul_round_pch(U, A, B, R)                                 \
+  ((__m256h)__builtin_ia32_vfmulcph256_round_mask(                             \
+      (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B),                              \
+      (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_getexp_round_pd(A, R)                                           \
+  ((__m256d)__builtin_ia32_vgetexppd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1,       \
+      (int)(R)))
+
+#define _mm256_mask_getexp_round_pd(W, U, A, R)                                \
+  ((__m256d)__builtin_ia32_vgetexppd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_getexp_round_pd(U, A, R)                                  \
+  ((__m256d)__builtin_ia32_vgetexppd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U),        \
+      (int)(R)))
+
+#define _mm256_getexp_round_ph(A, R)                                           \
+  ((__m256h)__builtin_ia32_vgetexpph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)-1,    \
+      (int)(R)))
+
+#define _mm256_mask_getexp_round_ph(W, U, A, R)                                \
+  ((__m256h)__builtin_ia32_vgetexpph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_getexp_round_ph(U, A, R)                                  \
+  ((__m256h)__builtin_ia32_vgetexpph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U),     \
+      (int)(R)))
+
+#define _mm256_getexp_round_ps(A, R)                                           \
+  ((__m256)__builtin_ia32_vgetexpps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)-1,        \
+      (int)(R)))
+
+#define _mm256_mask_getexp_round_ps(W, U, A, R)                                \
+  ((__m256)__builtin_ia32_vgetexpps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_getexp_round_ps(U, A, R)                                  \
+  ((__m256)__builtin_ia32_vgetexpps256_round_mask((__v8sf)(__m256)(A),         \
+                                                  (__v8sf)_mm256_setzero_ps(), \
+                                                  (__mmask8)(U), (int)(R)))
+
+#define _mm256_getmant_round_pd(A, B, C, R)                                    \
+  ((__m256d)__builtin_ia32_vgetmantpd256_round_mask(                           \
+      (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)),                           \
+      (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_getmant_round_pd(W, U, A, B, C, R)                         \
+  ((__m256d)__builtin_ia32_vgetmantpd256_round_mask(                           \
+      (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), (__v4df)(__m256d)(W),     \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_getmant_round_pd(U, A, B, C, R)                           \
+  ((__m256d)__builtin_ia32_vgetmantpd256_round_mask(                           \
+      (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)),                           \
+      (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_getmant_round_ph(A, B, C, R)                                    \
+  ((__m256h)__builtin_ia32_vgetmantph256_round_mask(                           \
+      (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)),                          \
+      (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_getmant_round_ph(W, U, A, B, C, R)                         \
+  ((__m256h)__builtin_ia32_vgetmantph256_round_mask(                           \
+      (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W),   \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_getmant_round_ph(U, A, B, C, R)                           \
+  ((__m256h)__builtin_ia32_vgetmantph256_round_mask(                           \
+      (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)),                          \
+      (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+#define _mm256_getmant_round_ps(A, B, C, R)                                    \
+  ((__m256)__builtin_ia32_vgetmantps256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)),                            \
+      (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_getmant_round_ps(W, U, A, B, C, R)                         \
+  ((__m256)__builtin_ia32_vgetmantps256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), (__v8sf)(__m256)(W),       \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_getmant_round_ps(U, A, B, C, R)                           \
+  ((__m256)__builtin_ia32_vgetmantps256_round_mask(                            \
+      (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)),                            \
+      (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_max_round_pd(A, B, R)                                           \
+  ((__m256d)__builtin_ia32_vmaxpd256_round((__v4df)(__m256d)(A),               \
+                                           (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_max_round_pd(W, U, A, B, R)                                \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)),               \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_max_round_pd(U, A, B, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)),               \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_max_round_ph(A, B, R)                                           \
+  ((__m256h)__builtin_ia32_vmaxph256_round((__v16hf)(__m256h)(A),              \
+                                           (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_max_round_ph(W, U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)),             \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_max_round_ph(U, A, B, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)),             \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_max_round_ps(A, B, R)                                           \
+  ((__m256)__builtin_ia32_vmaxps256_round((__v8sf)(__m256)(A),                 \
+                                          (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_max_round_ps(W, U, A, B, R)                                \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)),               \
+      (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_max_round_ps(U, A, B, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)),               \
+      (__v8sf)_mm256_setzero_ps()))
+
+#define _mm256_min_round_pd(A, B, R)                                           \
+  ((__m256d)__builtin_ia32_vminpd256_round((__v4df)(__m256d)(A),               \
+                                           (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_min_round_pd(W, U, A, B, R)                                \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)),               \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_min_round_pd(U, A, B, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)),               \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_min_round_ph(A, B, R)                                           \
+  ((__m256h)__builtin_ia32_vminph256_round((__v16hf)(__m256h)(A),              \
+                                           (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_min_round_ph(W, U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)),             \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_min_round_ph(U, A, B, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)),             \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_min_round_ps(A, B, R)                                           \
+  ((__m256)__builtin_ia32_vminps256_round((__v8sf)(__m256)(A),                 \
+                                          (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_min_round_ps(W, U, A, B, R)                                \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)),               \
+      (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_min_round_ps(U, A, B, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)),               \
+      (__v8sf)_mm256_setzero_ps()))
+
+#define _mm256_mul_round_pd(A, B, R)                                           \
+  ((__m256d)__builtin_ia32_vmulpd256_round((__v4df)(__m256d)(A),               \
+                                           (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_mul_round_pd(W, U, A, B, R)                                \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)),               \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_mul_round_pd(U, A, B, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)),               \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_mul_round_ph(A, B, R)                                           \
+  ((__m256h)__builtin_ia32_vmulph256_round((__v16hf)(__m256h)(A),              \
+                                           (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_mul_round_ph(W, U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)),             \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_mul_round_ph(U, A, B, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)),             \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_mul_round_ps(A, B, R)                                           \
+  ((__m256)__builtin_ia32_vmulps256_round((__v8sf)(__m256)(A),                 \
+                                          (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_mul_round_ps(W, U, A, B, R)                                \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)),               \
+      (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_mul_round_ps(U, A, B, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)),               \
+      (__v8sf)_mm256_setzero_ps()))
+
+#define _mm256_range_round_pd(A, B, C, R)                                      \
+  ((__m256d)__builtin_ia32_vrangepd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
+      (__v4df)_mm256_setzero_pd(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_range_round_pd(W, U, A, B, C, R)                           \
+  ((__m256d)__builtin_ia32_vrangepd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
+      (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_range_round_pd(U, A, B, C, R)                             \
+  ((__m256d)__builtin_ia32_vrangepd256_round_mask(                             \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C),                    \
+      (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_range_round_ps(A, B, C, R)                                      \
+  ((__m256)__builtin_ia32_vrangeps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
+      (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_range_round_ps(W, U, A, B, C, R)                           \
+  ((__m256)__builtin_ia32_vrangeps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_range_round_ps(U, A, B, C, R)                             \
+  ((__m256)__builtin_ia32_vrangeps256_round_mask(                              \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C),                      \
+      (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R)))
+
+#define _mm256_reduce_round_pd(A, B, R)                                        \
+  ((__m256d)__builtin_ia32_vreducepd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(),             \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_reduce_round_pd(W, U, A, B, R)                             \
+  ((__m256d)__builtin_ia32_vreducepd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U),     \
+      (int)(R)))
+
+#define _mm256_maskz_reduce_round_pd(U, A, B, R)                               \
+  ((__m256d)__builtin_ia32_vreducepd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(),             \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_mask_reduce_round_ph(W, U, A, imm, R)                           \
+  ((__m256h)__builtin_ia32_vreduceph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W),                \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_reduce_round_ph(U, A, imm, R)                             \
+  ((__m256h)__builtin_ia32_vreduceph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(),         \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_reduce_round_ph(A, imm, R)                                      \
+  ((__m256h)__builtin_ia32_vreduceph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(),       \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_reduce_round_ps(A, B, R)                                        \
+  ((__m256)__builtin_ia32_vreduceps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(),              \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_reduce_round_ps(W, U, A, B, R)                             \
+  ((__m256)__builtin_ia32_vreduceps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U),       \
+      (int)(R)))
+
+#define _mm256_maskz_reduce_round_ps(U, A, B, R)                               \
+  ((__m256)__builtin_ia32_vreduceps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(),              \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_roundscale_round_pd(A, imm, R)                                  \
+  ((__m256d)__builtin_ia32_vrndscalepd256_round_mask(                          \
+      (__v4df)(__m256d)(A), (int)(imm), (__v4df)_mm256_undefined_pd(),         \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_roundscale_round_pd(A, B, C, imm, R)                       \
+  ((__m256d)__builtin_ia32_vrndscalepd256_round_mask(                          \
+      (__v4df)(__m256d)(C), (int)(imm), (__v4df)(__m256d)(A), (__mmask8)(B),   \
+      (int)(R)))
+
+#define _mm256_maskz_roundscale_round_pd(A, B, imm, R)                         \
+  ((__m256d)__builtin_ia32_vrndscalepd256_round_mask(                          \
+      (__v4df)(__m256d)(B), (int)(imm), (__v4df)_mm256_setzero_pd(),           \
+      (__mmask8)(A), (int)(R)))
+
+#define _mm256_roundscale_round_ph(A, imm, R)                                  \
+  ((__m256h)__builtin_ia32_vrndscaleph256_round_mask(                          \
+      (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(),       \
+      (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_roundscale_round_ph(A, B, C, imm, R)                       \
+  ((__m256h)__builtin_ia32_vrndscaleph256_round_mask(                          \
+      (__v16hf)(__m256h)(C), (int)(imm), (__v16hf)(__m256h)(A),                \
+      (__mmask16)(B), (int)(R)))
+
+#define _mm256_maskz_roundscale_round_ph(A, B, imm, R)                         \
+  ((__m256h)__builtin_ia32_vrndscaleph256_round_mask(                          \
+      (__v16hf)(__m256h)(B), (int)(imm), (__v16hf)_mm256_setzero_ph(),         \
+      (__mmask16)(A), (int)(R)))
+
+#define _mm256_roundscale_round_ps(A, imm, R)                                  \
+  ((__m256)__builtin_ia32_vrndscaleps256_round_mask(                           \
+      (__v8sf)(__m256)(A), (int)(imm), (__v8sf)_mm256_undefined_ps(),          \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_roundscale_round_ps(A, B, C, imm, R)                       \
+  ((__m256)__builtin_ia32_vrndscaleps256_round_mask(                           \
+      (__v8sf)(__m256)(C), (int)(imm), (__v8sf)(__m256)(A), (__mmask8)(B),     \
+      (int)(R)))
+
+#define _mm256_maskz_roundscale_round_ps(A, B, imm, R)                         \
+  ((__m256)__builtin_ia32_vrndscaleps256_round_mask(                           \
+      (__v8sf)(__m256)(B), (int)(imm), (__v8sf)_mm256_setzero_ps(),            \
+      (__mmask8)(A), (int)(R)))
+
+#define _mm256_scalef_round_pd(A, B, R)                                        \
+  ((__m256d)__builtin_ia32_vscalefpd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B),                              \
+      (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_scalef_round_pd(W, U, A, B, R)                             \
+  ((__m256d)__builtin_ia32_vscalefpd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(W),        \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_scalef_round_pd(U, A, B, R)                               \
+  ((__m256d)__builtin_ia32_vscalefpd256_round_mask(                            \
+      (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)_mm256_setzero_pd(), \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_scalef_round_ph(A, B, R)                                        \
+  ((__m256h)__builtin_ia32_vscalefph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B),                            \
+      (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R)))
+
+#define _mm256_mask_scalef_round_ph(W, U, A, B, R)                             \
+  ((__m256h)__builtin_ia32_vscalefph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(W),     \
+      (__mmask16)(U), (int)(R)))
+
+#define _mm256_maskz_scalef_round_ph(U, A, B, R)                               \
+  ((__m256h)__builtin_ia32_vscalefph256_round_mask(                            \
+      (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B),                            \
+      (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+#define _mm256_scalef_round_ps(A, B, R)                                        \
+  ((__m256)__builtin_ia32_vscalefps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_undefined_ps(), \
+      (__mmask8)-1, (int)(R)))
+
+#define _mm256_mask_scalef_round_ps(W, U, A, B, R)                             \
+  ((__m256)__builtin_ia32_vscalefps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(W),           \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_maskz_scalef_round_ps(U, A, B, R)                               \
+  ((__m256)__builtin_ia32_vscalefps256_round_mask(                             \
+      (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_setzero_ps(),   \
+      (__mmask8)(U), (int)(R)))
+
+#define _mm256_sqrt_round_pd(A, R)                                             \
+  ((__m256d)__builtin_ia32_vsqrtpd256_round((__v4df)(__m256d)(A), (int)(R)))
+
+#define _mm256_mask_sqrt_round_pd(W, U, A, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)),                   \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_sqrt_round_pd(U, A, R)                                    \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)),                   \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_sqrt_round_ph(A, R)                                             \
+  ((__m256h)__builtin_ia32_vsqrtph256_round((__v16hf)(__m256h)(A), (int)(R)))
+
+#define _mm256_mask_sqrt_round_ph(W, U, A, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)),                 \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_sqrt_round_ph(U, A, R)                                    \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)),                 \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_sqrt_round_ps(A, R)                                             \
+  ((__m256)__builtin_ia32_vsqrtps256_round((__v8sf)(__m256)(A), (int)(R)))
+
+#define _mm256_mask_sqrt_round_ps(W, U, A, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U),                          \
+                                       (__v8sf)_mm256_sqrt_round_ps((A), (R)), \
+                                       (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_sqrt_round_ps(U, A, R)                                    \
+  ((__m256)__builtin_ia32_selectps_256((__mmask8)(U),                          \
+                                       (__v8sf)_mm256_sqrt_round_ps((A), (R)), \
+                                       (__v8sf)_mm256_setzero_ps()))
+
+#define _mm256_sub_round_pd(A, B, R)                                           \
+  ((__m256d)__builtin_ia32_vsubpd256_round((__v4df)(__m256d)(A),               \
+                                           (__v4df)(__m256d)(B), (int)(R)))
+
+#define _mm256_mask_sub_round_pd(W, U, A, B, R)                                \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)),               \
+      (__v4df)(__m256d)(W)))
+
+#define _mm256_maskz_sub_round_pd(U, A, B, R)                                  \
+  ((__m256d)__builtin_ia32_selectpd_256(                                       \
+      (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)),               \
+      (__v4df)_mm256_setzero_pd()))
+
+#define _mm256_sub_round_ph(A, B, R)                                           \
+  ((__m256h)__builtin_ia32_vsubph256_round((__v16hf)(__m256h)(A),              \
+                                           (__v16hf)(__m256h)(B), (int)(R)))
+
+#define _mm256_mask_sub_round_ph(W, U, A, B, R)                                \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)),             \
+      (__v16hf)(__m256h)(W)))
+
+#define _mm256_maskz_sub_round_ph(U, A, B, R)                                  \
+  ((__m256h)__builtin_ia32_selectph_256(                                       \
+      (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)),             \
+      (__v16hf)_mm256_setzero_ph()))
+
+#define _mm256_sub_round_ps(A, B, R)                                           \
+  ((__m256)__builtin_ia32_vsubps256_round((__v8sf)(__m256)(A),                 \
+                                          (__v8sf)(__m256)(B), (int)(R)))
+
+#define _mm256_mask_sub_round_ps(W, U, A, B, R)                                \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)),               \
+      (__v8sf)(__m256)(W)))
+
+#define _mm256_maskz_sub_round_ps(U, A, B, R)                                  \
+  ((__m256)__builtin_ia32_selectps_256(                                        \
+      (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)),               \
+      (__v8sf)_mm256_setzero_ps()))
+
 #endif /* __AVX10_2NIINTRIN_H */
 #endif /* __SSE2__ */

diff  --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index bf2d2d8ac8f42..5547e980d4bbb 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -53,6 +53,12 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_minps512:
   case X86::BI__builtin_ia32_maxph512:
   case X86::BI__builtin_ia32_minph512:
+  case X86::BI__builtin_ia32_vmaxpd256_round:
+  case X86::BI__builtin_ia32_vmaxps256_round:
+  case X86::BI__builtin_ia32_vminpd256_round:
+  case X86::BI__builtin_ia32_vminps256_round:
+  case X86::BI__builtin_ia32_vmaxph256_round:
+  case X86::BI__builtin_ia32_vminph256_round:
     ArgNum = 2;
     break;
   case X86::BI__builtin_ia32_vcvtph2pd512_mask:
@@ -79,10 +85,17 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcomiss:
   case X86::BI__builtin_ia32_vcomish:
   case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+  case X86::BI__builtin_ia32_vgetexppd256_round_mask:
+  case X86::BI__builtin_ia32_vgetexpps256_round_mask:
+  case X86::BI__builtin_ia32_vgetexpph256_round_mask:
     ArgNum = 3;
     break;
   case X86::BI__builtin_ia32_cmppd512_mask:
   case X86::BI__builtin_ia32_cmpps512_mask:
+  case X86::BI__builtin_ia32_cmpph512_mask:
+  case X86::BI__builtin_ia32_vcmppd256_round_mask:
+  case X86::BI__builtin_ia32_vcmpps256_round_mask:
+  case X86::BI__builtin_ia32_vcmpph256_round_mask:
   case X86::BI__builtin_ia32_cmpsd_mask:
   case X86::BI__builtin_ia32_cmpss_mask:
   case X86::BI__builtin_ia32_cmpsh_mask:
@@ -95,6 +108,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_getmantpd512_mask:
   case X86::BI__builtin_ia32_getmantps512_mask:
   case X86::BI__builtin_ia32_getmantph512_mask:
+  case X86::BI__builtin_ia32_vgetmantpd256_round_mask:
+  case X86::BI__builtin_ia32_vgetmantps256_round_mask:
+  case X86::BI__builtin_ia32_vgetmantph256_round_mask:
   case X86::BI__builtin_ia32_maxsd_round_mask:
   case X86::BI__builtin_ia32_maxss_round_mask:
   case X86::BI__builtin_ia32_maxsh_round_mask:
@@ -107,12 +123,22 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_rndscalepd_mask:
   case X86::BI__builtin_ia32_rndscaleps_mask:
   case X86::BI__builtin_ia32_rndscaleph_mask:
+  case X86::BI__builtin_ia32_vreducepd256_round_mask:
+  case X86::BI__builtin_ia32_vreduceps256_round_mask:
+  case X86::BI__builtin_ia32_vreduceph256_round_mask:
+  case X86::BI__builtin_ia32_vrndscalepd256_round_mask:
+  case X86::BI__builtin_ia32_vrndscaleps256_round_mask:
+  case X86::BI__builtin_ia32_vrndscaleph256_round_mask:
     ArgNum = 4;
     break;
   case X86::BI__builtin_ia32_fixupimmpd512_mask:
   case X86::BI__builtin_ia32_fixupimmpd512_maskz:
   case X86::BI__builtin_ia32_fixupimmps512_mask:
   case X86::BI__builtin_ia32_fixupimmps512_maskz:
+  case X86::BI__builtin_ia32_vfixupimmpd256_round_mask:
+  case X86::BI__builtin_ia32_vfixupimmpd256_round_maskz:
+  case X86::BI__builtin_ia32_vfixupimmps256_round_mask:
+  case X86::BI__builtin_ia32_vfixupimmps256_round_maskz:
   case X86::BI__builtin_ia32_fixupimmsd_mask:
   case X86::BI__builtin_ia32_fixupimmsd_maskz:
   case X86::BI__builtin_ia32_fixupimmss_mask:
@@ -122,6 +148,8 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_getmantsh_round_mask:
   case X86::BI__builtin_ia32_rangepd512_mask:
   case X86::BI__builtin_ia32_rangeps512_mask:
+  case X86::BI__builtin_ia32_vrangepd256_round_mask:
+  case X86::BI__builtin_ia32_vrangeps256_round_mask:
   case X86::BI__builtin_ia32_rangesd128_round_mask:
   case X86::BI__builtin_ia32_rangess128_round_mask:
   case X86::BI__builtin_ia32_reducesd_mask:
@@ -147,6 +175,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_sqrtpd512:
   case X86::BI__builtin_ia32_sqrtps512:
   case X86::BI__builtin_ia32_sqrtph512:
+  case X86::BI__builtin_ia32_vsqrtpd256_round:
+  case X86::BI__builtin_ia32_vsqrtps256_round:
+  case X86::BI__builtin_ia32_vsqrtph256_round:
     ArgNum = 1;
     HasRC = true;
     break;
@@ -165,6 +196,15 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vaddpd256_round:
   case X86::BI__builtin_ia32_vaddph256_round:
   case X86::BI__builtin_ia32_vaddps256_round:
+  case X86::BI__builtin_ia32_vdivpd256_round:
+  case X86::BI__builtin_ia32_vdivph256_round:
+  case X86::BI__builtin_ia32_vdivps256_round:
+  case X86::BI__builtin_ia32_vmulpd256_round:
+  case X86::BI__builtin_ia32_vmulph256_round:
+  case X86::BI__builtin_ia32_vmulps256_round:
+  case X86::BI__builtin_ia32_vsubpd256_round:
+  case X86::BI__builtin_ia32_vsubph256_round:
+  case X86::BI__builtin_ia32_vsubps256_round:
   case X86::BI__builtin_ia32_cvtsi2sd64:
   case X86::BI__builtin_ia32_cvtsi2ss32:
   case X86::BI__builtin_ia32_cvtsi2ss64:
@@ -207,6 +247,52 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
   case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtph2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2psx256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttpd2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttps2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2w256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2uw256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvttph2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2phx256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtpd2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtps2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2w256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2uw256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2dq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2udq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2qq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtph2uqq256_round_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
     ArgNum = 3;
     HasRC = true;
     break;
@@ -225,6 +311,9 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_scalefph512_mask:
   case X86::BI__builtin_ia32_scalefpd512_mask:
   case X86::BI__builtin_ia32_scalefps512_mask:
+  case X86::BI__builtin_ia32_vscalefph256_round_mask:
+  case X86::BI__builtin_ia32_vscalefpd256_round_mask:
+  case X86::BI__builtin_ia32_vscalefps256_round_mask:
   case X86::BI__builtin_ia32_scalefsd_round_mask:
   case X86::BI__builtin_ia32_scalefss_round_mask:
   case X86::BI__builtin_ia32_scalefsh_round_mask:
@@ -267,6 +356,38 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
   case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
   case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddps256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddph256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmaddcph256_round_mask:
+  case X86::BI__builtin_ia32_vfmaddcph256_round_maskz:
+  case X86::BI__builtin_ia32_vfmaddcph256_round_mask3:
+  case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
+  case X86::BI__builtin_ia32_vfcmaddcph256_round_maskz:
+  case X86::BI__builtin_ia32_vfcmaddcph256_round_mask3:
+  case X86::BI__builtin_ia32_vfmulcph256_round_mask:
+  case X86::BI__builtin_ia32_vfcmulcph256_round_mask:
   case X86::BI__builtin_ia32_vfmaddcsh_mask:
   case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
   case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
@@ -637,6 +758,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_getmantph128_mask:
   case X86::BI__builtin_ia32_getmantph256_mask:
   case X86::BI__builtin_ia32_getmantph512_mask:
+  case X86::BI__builtin_ia32_vgetmantpd256_round_mask:
+  case X86::BI__builtin_ia32_vgetmantps256_round_mask:
+  case X86::BI__builtin_ia32_vgetmantph256_round_mask:
   case X86::BI__builtin_ia32_vec_ext_v16qi:
   case X86::BI__builtin_ia32_vec_ext_v16hi:
     i = 1;
@@ -655,6 +779,8 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_rangeps128_mask:
   case X86::BI__builtin_ia32_rangeps256_mask:
   case X86::BI__builtin_ia32_rangeps512_mask:
+  case X86::BI__builtin_ia32_vrangepd256_round_mask:
+  case X86::BI__builtin_ia32_vrangeps256_round_mask:
   case X86::BI__builtin_ia32_getmantsd_round_mask:
   case X86::BI__builtin_ia32_getmantss_round_mask:
   case X86::BI__builtin_ia32_getmantsh_round_mask:
@@ -681,6 +807,10 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_cmppd256_mask:
   case X86::BI__builtin_ia32_cmpps512_mask:
   case X86::BI__builtin_ia32_cmppd512_mask:
+  case X86::BI__builtin_ia32_cmpph512_mask:
+  case X86::BI__builtin_ia32_vcmppd256_round_mask:
+  case X86::BI__builtin_ia32_vcmpps256_round_mask:
+  case X86::BI__builtin_ia32_vcmpph256_round_mask:
   case X86::BI__builtin_ia32_cmpsd_mask:
   case X86::BI__builtin_ia32_cmpss_mask:
   case X86::BI__builtin_ia32_vec_set_v32qi:
@@ -726,6 +856,12 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_reduceph128_mask:
   case X86::BI__builtin_ia32_reduceph256_mask:
   case X86::BI__builtin_ia32_reduceph512_mask:
+  case X86::BI__builtin_ia32_vreducepd256_round_mask:
+  case X86::BI__builtin_ia32_vreduceps256_round_mask:
+  case X86::BI__builtin_ia32_vreduceph256_round_mask:
+  case X86::BI__builtin_ia32_vrndscalepd256_round_mask:
+  case X86::BI__builtin_ia32_vrndscaleps256_round_mask:
+  case X86::BI__builtin_ia32_vrndscaleph256_round_mask:
   case X86::BI__builtin_ia32_prold512:
   case X86::BI__builtin_ia32_prolq512:
   case X86::BI__builtin_ia32_prold128:

diff  --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
index baf3a35a9a191..ace3b7e30c7f6 100644
--- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
@@ -104,3 +104,2347 @@ __m256 test_mm256_maskz_add_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_add_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
 }
+
+__mmask8 test_mm256_cmp_round_pd_mask(__m256d a, __m256d b) {
+// CHECK-LABEL: @test_mm256_cmp_round_pd_mask
+// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
+  return _mm256_cmp_round_pd_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask8 test_mm256_mask_cmp_round_pd_mask(__mmask8 m, __m256d a, __m256d b) {
+// CHECK-LABEL: @test_mm256_mask_cmp_round_pd_mask
+// CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> %{{.*}}, %{{.*}}
+// CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_round_pd_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask16 test_mm256_cmp_round_ph_mask(__m256h a, __m256h b) {
+// CHECK-LABEL: @test_mm256_cmp_round_ph_mask
+// CHECK: fcmp oeq <16 x half> %{{.*}}, %{{.*}}
+  return _mm256_cmp_round_ph_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask16 test_mm256_mask_cmp_round_ph_mask(__mmask16 m, __m256h a, __m256h b) {
+// CHECK-LABEL: @test_mm256_mask_cmp_round_ph_mask
+// CHECK: [[CMP:%.*]] = fcmp oeq <16 x half> %{{.*}}, %{{.*}}
+// CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_round_ph_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask8 test_mm256_cmp_round_ps_mask(__m256 a, __m256 b) {
+// CHECK-LABEL: @test_mm256_cmp_round_ps_mask
+// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
+  return _mm256_cmp_round_ps_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask8 test_mm256_mask_cmp_round_ps_mask(__mmask8 m, __m256 a, __m256 b) {
+// CHECK-LABEL: @test_mm256_mask_cmp_round_ps_mask
+// CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> %{{.*}}, %{{.*}}
+// CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_round_ps_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_cvt_roundepi32_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepi32_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 11)
+  return _mm256_cvt_roundepi32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvt_roundepi32_ph(__m128h A, __mmask8 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepi32_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
+  return _mm256_mask_cvt_roundepi32_ph(A, B, C, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvt_roundepi32_ph(__mmask8 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepi32_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
+  return _mm256_maskz_cvt_roundepi32_ph(A, B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_cvt_roundepi32_ps(__m256i __A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundepi32_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32
+  return _mm256_cvt_roundepi32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_cvt_roundepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundepi32_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_cvt_roundepi32_ps(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_cvt_roundepi32_ps(__mmask8 __U, __m256i __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundepi32_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_cvt_roundepi32_ps(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_cvt_roundpd_epi32(__m256d A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256
+  return _mm256_cvt_roundpd_epi32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_mask_cvt_roundpd_epi32(__m128i W,__mmask8 U,__m256d A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256
+  return _mm256_mask_cvt_roundpd_epi32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_maskz_cvt_roundpd_epi32(__mmask8 U, __m256d A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256
+  return _mm256_maskz_cvt_roundpd_epi32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_cvt_roundpd_ph(__m256d A) {
+// CHECK-LABEL: test_mm256_cvt_roundpd_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256
+  return _mm256_cvt_roundpd_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvt_roundpd_ph(__m128h A, __mmask8 B, __m256d C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundpd_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256
+  return _mm256_mask_cvt_roundpd_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvt_roundpd_ph(__mmask8 A, __m256d B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundpd_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256
+  return _mm256_maskz_cvt_roundpd_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_cvt_roundpd_ps(__m256d A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundpd_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256
+  return _mm256_cvt_roundpd_ps(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_mask_cvt_roundpd_ps(__m128 W, __mmask8 U,__m256d A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256
+  return _mm256_mask_cvt_roundpd_ps(W, U, A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_maskz_cvt_roundpd_ps(__mmask8 U, __m256d A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256
+  return _mm256_maskz_cvt_roundpd_ps(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundpd_epi64(__m256d __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256
+  return _mm256_cvt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256
+  return _mm256_mask_cvt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundpd_epi64(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256
+  return _mm256_maskz_cvt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_cvt_roundpd_epu32(__m256d A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256
+  return _mm256_cvt_roundpd_epu32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_mask_cvt_roundpd_epu32(__m128i W,__mmask8 U,__m256d A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256
+  return _mm256_mask_cvt_roundpd_epu32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_maskz_cvt_roundpd_epu32(__mmask8 U, __m256d A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256
+  return _mm256_maskz_cvt_roundpd_epu32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundpd_epu64(__m256d __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256
+  return _mm256_cvt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256
+  return _mm256_mask_cvt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundpd_epu64(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256
+  return _mm256_maskz_cvt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epi32(__m128h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256
+  return _mm256_cvt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epi32(__m256i A, __mmask16 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256
+  return _mm256_mask_cvt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epi32(__mmask16 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256
+  return _mm256_maskz_cvt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_cvt_roundph_pd(__m128h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256
+  return _mm256_cvt_roundph_pd(A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_cvt_roundph_pd(__m256d A, __mmask8 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256
+  return _mm256_mask_cvt_roundph_pd(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_cvt_roundph_pd(__mmask8 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256
+  return _mm256_maskz_cvt_roundph_pd(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_cvtx_roundph_ps(__m128h A) {
+// CHECK-LABEL: test_mm256_cvtx_roundph_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256
+  return _mm256_cvtx_roundph_ps(A, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_cvtx_roundph_ps(__m256 A, __mmask16 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvtx_roundph_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256
+  return _mm256_mask_cvtx_roundph_ps(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_cvtx_roundph_ps(__mmask16 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtx_roundph_ps
+// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256
+  return _mm256_maskz_cvtx_roundph_ps(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epi64(__m128h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256
+  return _mm256_cvt_roundph_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epi64(__m256i A, __mmask8 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256
+  return _mm256_mask_cvt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epi64(__mmask8 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256
+  return _mm256_maskz_cvt_roundph_epi64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epu32(__m128h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256
+  return _mm256_cvt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epu32(__m256i A, __mmask16 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256
+  return _mm256_mask_cvt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epu32(__mmask16 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256
+  return _mm256_maskz_cvt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epu64(__m128h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256
+  return _mm256_cvt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epu64(__m256i A, __mmask8 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256
+  return _mm256_mask_cvt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epu64(__mmask8 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256
+  return _mm256_maskz_cvt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epu16(__m256h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256
+  return _mm256_cvt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epu16(__m256i A, __mmask32 B, __m256h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256
+  return _mm256_mask_cvt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epu16(__mmask32 A, __m256h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256
+  return _mm256_maskz_cvt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundph_epi16(__m256h A) {
+// CHECK-LABEL: test_mm256_cvt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2w256
+  return _mm256_cvt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundph_epi16(__m256i A, __mmask32 B, __m256h C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2w256
+  return _mm256_mask_cvt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundph_epi16(__mmask32 A, __m256h B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvtph2w256
+  return _mm256_maskz_cvt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundps_epi32(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256
+  return _mm256_cvt_roundps_epi32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundps_epi32(__m256i __W,__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256
+  return _mm256_mask_cvt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundps_epi32(__mmask16 __U, __m256 __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256
+  return _mm256_maskz_cvt_roundps_epi32(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_cvt_roundps_pd(__m128 __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundps_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256
+  return _mm256_cvt_roundps_pd(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_cvt_roundps_pd(__m256d __W, __mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundps_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256
+  return _mm256_mask_cvt_roundps_pd(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_cvt_roundps_pd(__mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_pd
+// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256
+  return _mm256_maskz_cvt_roundps_pd(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+// FIXME: We may change to @llvm.x86.avx10.mask.vcvtps2ph256 in future.
+__m128i test_mm256_cvt_roundps_ph(__m256  __A)
+{
+  // CHECK-LABEL: @test_mm256_cvt_roundps_ph
+  // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
+    return _mm256_cvt_roundps_ph(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_mask_cvt_roundps_ph(__m128i __W , __mmask16 __U, __m256  __A)
+{
+  // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph
+  // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
+    return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_maskz_cvt_roundps_ph(__mmask16 __U, __m256  __A)
+{
+  // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph
+  // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
+    return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_cvtx_roundps_ph(__m256 A) {
+// CHECK-LABEL: test_mm256_cvtx_roundps_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256
+  return _mm256_cvtx_roundps_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvtx_roundps_ph(__m128h A, __mmask16 B, __m256 C) {
+// CHECK-LABEL: test_mm256_mask_cvtx_roundps_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256
+  return _mm256_mask_cvtx_roundps_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvtx_roundps_ph(__mmask16 A, __m256 B) {
+// CHECK-LABEL: test_mm256_maskz_cvtx_roundps_ph
+// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256
+  return _mm256_maskz_cvtx_roundps_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundps_epi64(__m128 __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256
+  return _mm256_cvt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256
+  return _mm256_mask_cvt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundps_epi64(__mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256
+  return _mm256_maskz_cvt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundps_epu32(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256
+  return _mm256_cvt_roundps_epu32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundps_epu32(__m256i __W,__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256
+  return _mm256_mask_cvt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundps_epu32(__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256
+  return _mm256_maskz_cvt_roundps_epu32(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvt_roundps_epu64(__m128 __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256
+  return _mm256_cvt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvt_roundps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256
+  return _mm256_mask_cvt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvt_roundps_epu64(__mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256
+  return _mm256_maskz_cvt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_cvt_roundepi64_pd(__m256i __A) {
+// CHECK-LABEL: @test__mm256_cvt_roundepi64_pd
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64
+  return _mm256_cvt_roundepi64_pd(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_mask_cvt_roundepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test__mm256_mask_cvt_roundepi64_pd
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64
+  return _mm256_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_maskz_cvt_roundepi64_pd(__mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test__mm256_maskz_cvt_roundepi64_pd
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64
+  return _mm256_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+// FIXME: We may change to @llvm.x86.avx10.mask.vcvtqq2ph256 in future.
+__m128h test_mm256_cvt_roundepi64_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepi64_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64
+  return _mm256_cvt_roundepi64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvt_roundepi64_ph(__m128h A, __mmask8 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepi64_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64
+  return _mm256_mask_cvt_roundepi64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvt_roundepi64_ph(__mmask8 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepi64_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64
+  return _mm256_maskz_cvt_roundepi64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_cvt_roundepi64_ps(__m256i __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundepi64_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64
+  return _mm256_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_mask_cvt_roundepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundepi64_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64
+// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return _mm256_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_maskz_cvt_roundepi64_ps(__mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundepi64_ps
+// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64
+// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return _mm256_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_cvtt_roundpd_epi32(__m256d A)
+{
+// CHECK-LABEL: @test_mm256_cvtt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256
+  return _mm256_cvtt_roundpd_epi32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_mask_cvtt_roundpd_epi32(__m128i W,__mmask8 U,__m256d A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256
+  return _mm256_mask_cvtt_roundpd_epi32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_maskz_cvtt_roundpd_epi32(__mmask8 U, __m256d A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256
+  return _mm256_maskz_cvtt_roundpd_epi32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundpd_epi64(__m256d __A) {
+// CHECK-LABEL: @test_mm256_cvtt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256
+  return _mm256_cvtt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256
+  return _mm256_mask_cvtt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundpd_epi64(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256
+  return _mm256_maskz_cvtt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_cvtt_roundpd_epu32(__m256d A)
+{
+// CHECK-LABEL: @test_mm256_cvtt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256
+  return _mm256_cvtt_roundpd_epu32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_mask_cvtt_roundpd_epu32(__m128i W,__mmask8 U,__m256d A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256
+  return _mm256_mask_cvtt_roundpd_epu32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128i test_mm256_maskz_cvtt_roundpd_epu32(__mmask8 U, __m256d A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256
+  return _mm256_maskz_cvtt_roundpd_epu32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundpd_epu64(__m256d __A) {
+// CHECK-LABEL: @test_mm256_cvtt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256
+  return _mm256_cvtt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256
+  return _mm256_mask_cvtt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundpd_epu64(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256
+  return _mm256_maskz_cvtt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epi32(__m128h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256
+  return _mm256_cvtt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epi32(__m256i A, __mmask16 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256
+  return _mm256_mask_cvtt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epi32(__mmask16 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256
+  return _mm256_maskz_cvtt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epi64(__m128h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256
+  return _mm256_cvtt_roundph_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epi64(__m256i A, __mmask8 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256
+  return _mm256_mask_cvtt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epi64(__mmask8 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256
+  return _mm256_maskz_cvtt_roundph_epi64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epu32(__m128h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256
+  return _mm256_cvtt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epu32(__m256i A, __mmask16 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256
+  return _mm256_mask_cvtt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epu32(__mmask16 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256
+  return _mm256_maskz_cvtt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epu64(__m128h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256
+  return _mm256_cvtt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epu64(__m256i A, __mmask8 B, __m128h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256
+  return _mm256_mask_cvtt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epu64(__mmask8 A, __m128h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256
+  return _mm256_maskz_cvtt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epu16(__m256h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256
+  return _mm256_cvtt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epu16(__m256i A, __mmask32 B, __m256h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256
+  return _mm256_mask_cvtt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epu16(__mmask32 A, __m256h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256
+  return _mm256_maskz_cvtt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundph_epi16(__m256h A) {
+// CHECK-LABEL: test_mm256_cvtt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2w256
+  return _mm256_cvtt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundph_epi16(__m256i A, __mmask32 B, __m256h C) {
+// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2w256
+  return _mm256_mask_cvtt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundph_epi16(__mmask32 A, __m256h B) {
+// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi16
+// CHECK: @llvm.x86.avx10.mask.vcvttph2w256
+  return _mm256_maskz_cvtt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundps_epi32(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_cvtt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256
+  return _mm256_cvtt_roundps_epi32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundps_epi32(__m256i __W,__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256
+  return _mm256_mask_cvtt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundps_epi32(__mmask16 __U, __m256 __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epi32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256
+  return _mm256_maskz_cvtt_roundps_epi32(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundps_epi64(__m128 __A) {
+// CHECK-LABEL: @test_mm256_cvtt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256
+  return _mm256_cvtt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256
+  return _mm256_mask_cvtt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundps_epi64(__mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epi64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256
+  return _mm256_maskz_cvtt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundps_epu32(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_cvtt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256
+  return _mm256_cvtt_roundps_epu32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundps_epu32(__m256i __W,__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256
+  return _mm256_mask_cvtt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundps_epu32(__mmask16 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epu32
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256
+  return _mm256_maskz_cvtt_roundps_epu32(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_cvtt_roundps_epu64(__m128 __A) {
+// CHECK-LABEL: @test_mm256_cvtt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256
+  return _mm256_cvtt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_mask_cvtt_roundps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256
+  return _mm256_mask_cvtt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm256_maskz_cvtt_roundps_epu64(__mmask8 __U, __m128 __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epu64
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256
+  return _mm256_maskz_cvtt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_cvt_roundepu32_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepu32_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 11)
+  return _mm256_cvt_roundepu32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvt_roundepu32_ph(__m128h A, __mmask8 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepu32_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
+  return _mm256_mask_cvt_roundepu32_ph(A, B, C, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvt_roundepu32_ph(__mmask8 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepu32_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}}
+  return _mm256_maskz_cvt_roundepu32_ph(A, B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_cvt_roundepu32_ps(__m256i __A)
+{
+// CHECK-LABEL: @test_mm256_cvt_roundepu32_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32
+  return _mm256_cvt_roundepu32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_cvt_roundepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
+{
+// CHECK-LABEL: @test_mm256_mask_cvt_roundepu32_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_cvt_roundepu32_ps(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_cvt_roundepu32_ps(__mmask8 __U, __m256i __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundepu32_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_cvt_roundepu32_ps(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_cvt_roundepu64_pd(__m256i __A) {
+// CHECK-LABEL: @test__mm256_cvt_roundepu64_pd
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64
+  return _mm256_cvt_roundepu64_pd(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_mask_cvt_roundepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test__mm256_mask_cvt_roundepu64_pd
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64
+  return _mm256_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test__mm256_maskz_cvt_roundepu64_pd(__mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test__mm256_maskz_cvt_roundepu64_pd
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64
+  return _mm256_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+// FIXME: We may change to @llvm.x86.avx10.mask.vcvtuqq2ph256 in future.
+__m128h test_mm256_cvt_roundepu64_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepu64_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64
+  return _mm256_cvt_roundepu64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_mask_cvt_roundepu64_ph(__m128h A, __mmask8 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepu64_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64
+  return _mm256_mask_cvt_roundepu64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm256_maskz_cvt_roundepu64_ph(__mmask8 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepu64_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64
+  return _mm256_maskz_cvt_roundepu64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_cvt_roundepu64_ps(__m256i __A) {
+// CHECK-LABEL: @test_mm256_cvt_roundepu64_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64
+  return _mm256_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_mask_cvt_roundepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test_mm256_mask_cvt_roundepu64_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64
+// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return _mm256_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm256_maskz_cvt_roundepu64_ps(__mmask8 __U, __m256i __A) {
+// CHECK-LABEL: @test_mm256_maskz_cvt_roundepu64_ps
+// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64
+// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return _mm256_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_cvt_roundepi16_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepi16_ph
+// CHECK:   @llvm.x86.avx512.sitofp.round.v16f16.v16i16
+  return _mm256_cvt_roundepi16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_cvt_roundepi16_ph(__m256h A, __mmask16 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepi16_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i16
+  return _mm256_mask_cvt_roundepi16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_cvt_roundepi16_ph(__mmask16 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepi16_ph
+// CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i16
+  return _mm256_maskz_cvt_roundepi16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_cvt_roundepu16_ph(__m256i A) {
+// CHECK-LABEL: test_mm256_cvt_roundepu16_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16
+  return _mm256_cvt_roundepu16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_cvt_roundepu16_ph(__m256h A, __mmask16 B, __m256i C) {
+// CHECK-LABEL: test_mm256_mask_cvt_roundepu16_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16
+  return _mm256_mask_cvt_roundepu16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_cvt_roundepu16_ph(__mmask16 A, __m256i B) {
+// CHECK-LABEL: test_mm256_maskz_cvt_roundepu16_ph
+// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16
+  return _mm256_maskz_cvt_roundepu16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_div_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_div_round_pd
+// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+  return _mm256_div_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_div_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_div_round_pd
+// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_div_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_div_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_div_round_pd
+// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_div_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_div_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_div_round_ph
+// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11)
+  return _mm256_div_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_div_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_div_round_ph
+// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_div_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_div_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_div_round_ph
+// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_maskz_div_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_div_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_div_round_ps
+// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+  return _mm256_div_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_div_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_div_round_ps
+// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_div_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_div_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_div_round_ps
+// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_div_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fcmadd_round_pch(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fcmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256
+  return _mm256_fcmadd_round_pch(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fcmadd_round_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fcmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256
+// CHECK:  %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fcmadd_round_pch(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fcmadd_round_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fcmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256
+// CHECK-NOT:  %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fcmadd_round_pch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fcmadd_round_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fcmadd_round_pch
+// CHECK: @llvm.x86.avx10.maskz.vfcmaddcph256
+  return _mm256_maskz_fcmadd_round_pch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_cmul_round_pch(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_cmul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmulcph256
+  return _mm256_cmul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_cmul_round_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_cmul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmulcph256
+  return _mm256_mask_cmul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_cmul_round_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_cmul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfcmulcph256
+  return _mm256_maskz_cmul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fixupimm_round_pd(__m256d __A, __m256d __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_fixupimm_round_pd
+// CHECK: @llvm.x86.avx10.mask.vfixupimmpd256
+  return _mm256_fixupimm_round_pd(__A, __B, __C, 5, 8);
+}
+
+__m256d test_mm256_mask_fixupimm_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_mask_fixupimm_round_pd
+// CHECK: @llvm.x86.avx10.mask.vfixupimmpd256
+  return _mm256_mask_fixupimm_round_pd(__A, __U, __B, __C, 5, 8);
+}
+
+__m256d test_mm256_maskz_fixupimm_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_maskz_fixupimm_round_pd
+// CHECK: @llvm.x86.avx10.maskz.vfixupimmpd256
+  return _mm256_maskz_fixupimm_round_pd(__U, __A, __B, __C, 5, 8);
+}
+
+__m256 test_mm256_fixupimm_round_ps(__m256 __A, __m256 __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_fixupimm_round_ps
+// CHECK: @llvm.x86.avx10.mask.vfixupimmps256
+  return _mm256_fixupimm_round_ps(__A, __B, __C, 5, 8);
+}
+
+__m256 test_mm256_mask_fixupimm_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_mask_fixupimm_round_ps
+// CHECK: @llvm.x86.avx10.mask.vfixupimmps256
+  return _mm256_mask_fixupimm_round_ps(__A, __U, __B, __C, 5, 8);
+}
+
+__m256 test_mm256_maskz_fixupimm_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256i __C) {
+// CHECK-LABEL: @test_mm256_maskz_fixupimm_round_ps
+// CHECK: @llvm.x86.avx10.maskz.vfixupimmps256
+  return _mm256_maskz_fixupimm_round_ps(__U, __A, __B, __C, 5, 8);
+}
+
+__m256d test_mm256_fmadd_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fmadd_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+  return _mm256_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fmadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fmadd_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fmadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmadd_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fmadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmadd_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fmsub_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+  return _mm256_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fmsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fmsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fnmadd_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fnmadd_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+  return _mm256_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fnmadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fnmadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fnmsub_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fnmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+  return _mm256_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fnmsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fmadd_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fmadd_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddph256
+  return _mm256_fmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fmadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fmadd_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fmadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmadd_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fmadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmadd_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fmsub_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fmsub_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+  return _mm256_fmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fmsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsub_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fmsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsub_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fnmadd_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fnmadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+  return _mm256_fnmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fnmadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fnmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fnmadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fnmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fnmsub_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fnmsub_round_ph
+// CHECK: fneg
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+  return _mm256_fnmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fnmsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_ph
+// CHECK: fneg
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fnmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fmadd_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fmadd_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddps256
+  return _mm256_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fmadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fmadd_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fmadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmadd_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fmadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmadd_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fmsub_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+  return _mm256_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fmsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fmsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fnmadd_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fnmadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+  return _mm256_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fnmadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fnmadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fnmsub_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fnmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+  return _mm256_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fnmsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fmadd_round_pch(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmaddcph256
+  return _mm256_fmadd_round_pch(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fmadd_round_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmaddcph256
+// CHECK:  %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fmadd_round_pch(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fmadd_round_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmadd_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmaddcph256
+// CHECK-NOT:  %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fmadd_round_pch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fmadd_round_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmadd_round_pch
+// CHECK: @llvm.x86.avx10.maskz.vfmaddcph256
+  return _mm256_maskz_fmadd_round_pch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fmaddsub_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fmaddsub_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+  return _mm256_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fmaddsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fmaddsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fmaddsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_pd
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_fmsubadd_round_pd(__m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_fmsubadd_round_pd
+// CHECK: fneg <4 x double> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+  return _mm256_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fmsubadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_pd
+// CHECK: fneg <4 x double> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_fmsubadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_pd
+// CHECK: fneg <4 x double> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer
+  return _mm256_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fmaddsub_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fmaddsub_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+  return _mm256_fmaddsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fmaddsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fmaddsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fmaddsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fmaddsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fmaddsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_ph
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fmaddsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_fmsubadd_round_ph(__m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_fmsubadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+  return _mm256_fmsubadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fmsubadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fmsubadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_fmsubadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer
+  return _mm256_maskz_fmsubadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fmaddsub_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fmaddsub_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+  return _mm256_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fmaddsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fmaddsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fmaddsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_ps
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_fmsubadd_round_ps(__m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_fmsubadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+  return _mm256_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fmsubadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_fmsubadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer
+  return _mm256_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fmsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsub_round_pd
+// CHECK: fneg <4 x double> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fmsubadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_pd
+// CHECK: fneg <4 x double> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fnmadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmadd_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_fnmsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask3_fnmsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_pd
+// CHECK: fneg <4 x double>
+// CHECK: fneg <4 x double>
+// CHECK: @llvm.x86.avx10.vfmaddpd256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fmsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsub_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fmsubadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddsubph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fmsubadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fnmadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmadd_round_ph
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fnmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_fnmsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmsub_round_ph
+// CHECK: fneg
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_fnmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask3_fnmsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_ph
+// CHECK: fneg
+// CHECK: fneg
+// CHECK: @llvm.x86.avx10.vfmaddph256
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask3_fnmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fmsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fmsubadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddsubps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fnmadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmadd_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_fnmsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+// CHECK-LABEL: @test_mm256_mask_fnmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask3_fnmsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_ps
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: fneg <8 x float> %{{.*}}
+// CHECK: @llvm.x86.avx10.vfmaddps256
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mul_round_pch(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmulcph256
+  return _mm256_mul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_mul_round_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_mul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmulcph256
+  return _mm256_mask_mul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_mul_round_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_mul_round_pch
+// CHECK: @llvm.x86.avx10.mask.vfmulcph256
+  return _mm256_maskz_mul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_getexp_round_pd(__m256d __A) {
+// CHECK-LABEL: @test_mm256_getexp_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetexppd256
+  return _mm256_getexp_round_pd(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_getexp_round_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_getexp_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetexppd256
+  return _mm256_mask_getexp_round_pd(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_getexp_round_pd(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_getexp_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetexppd256
+  return _mm256_maskz_getexp_round_pd(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_getexp_round_ph(__m256h __A) {
+// CHECK-LABEL: @test_mm256_getexp_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetexpph256
+  return _mm256_getexp_round_ph(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_getexp_round_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_mask_getexp_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetexpph256
+  return _mm256_mask_getexp_round_ph(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_getexp_round_ph(__mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_maskz_getexp_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetexpph256
+  return _mm256_maskz_getexp_round_ph(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_getexp_round_ps(__m256 __A) {
+// CHECK-LABEL: @test_mm256_getexp_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetexpps256
+  return _mm256_getexp_round_ps(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_getexp_round_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_mask_getexp_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetexpps256
+  return _mm256_mask_getexp_round_ps(__W, __U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_getexp_round_ps(__mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_maskz_getexp_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetexpps256
+  return _mm256_maskz_getexp_round_ps(__U, __A, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_getmant_round_pd(__m256d __A) {
+// CHECK-LABEL: @test_mm256_getmant_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetmantpd256
+  return _mm256_getmant_round_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_getmant_round_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_getmant_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetmantpd256
+  return _mm256_mask_getmant_round_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_getmant_round_pd(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_getmant_round_pd
+// CHECK: @llvm.x86.avx10.mask.vgetmantpd256
+  return _mm256_maskz_getmant_round_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_getmant_round_ph(__m256h __A) {
+// CHECK-LABEL: @test_mm256_getmant_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetmantph256
+  return _mm256_getmant_round_ph(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_getmant_round_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_mask_getmant_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetmantph256
+  return _mm256_mask_getmant_round_ph(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_getmant_round_ph(__mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_maskz_getmant_round_ph
+// CHECK: @llvm.x86.avx10.mask.vgetmantph256
+  return _mm256_maskz_getmant_round_ph(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_getmant_round_ps(__m256 __A) {
+// CHECK-LABEL: @test_mm256_getmant_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetmantps256
+  return _mm256_getmant_round_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_getmant_round_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_mask_getmant_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetmantps256
+  return _mm256_mask_getmant_round_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_getmant_round_ps(__mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_maskz_getmant_round_ps
+// CHECK: @llvm.x86.avx10.mask.vgetmantps256
+  return _mm256_maskz_getmant_round_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_max_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_max_round_pd
+// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+  return _mm256_max_round_pd(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_max_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_max_round_pd
+// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_max_round_pd(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_max_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_max_round_pd
+// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_max_round_pd(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_max_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_max_round_ph
+// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+  return _mm256_max_round_ph(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_max_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_max_round_ph
+// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_max_round_ph(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_max_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_max_round_ph
+// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_maskz_max_round_ph(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_max_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_max_round_ps
+// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+  return _mm256_max_round_ps(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_max_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_max_round_ps
+// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_max_round_ps(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_max_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_max_round_ps
+// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_max_round_ps(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_min_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_min_round_pd
+// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+  return _mm256_min_round_pd(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_min_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_min_round_pd
+// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_min_round_pd(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_min_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_min_round_pd
+// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_min_round_pd(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_min_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_min_round_ph
+// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+  return _mm256_min_round_ph(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_min_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_min_round_ph
+// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_min_round_ph(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_min_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_min_round_ph
+// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_maskz_min_round_ph(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_min_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_min_round_ps
+// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+  return _mm256_min_round_ps(__A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_min_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_min_round_ps
+// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_min_round_ps(__W, __U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_min_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_min_round_ps
+// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_min_round_ps(__U, __A, __B, _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mul_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mul_round_pd
+// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+  return _mm256_mul_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_mul_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_mul_round_pd
+// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_mul_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_mul_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_mul_round_pd
+// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_mul_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mul_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mul_round_ph
+// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11)
+  return _mm256_mul_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_mul_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_mul_round_ph
+// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_mul_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_mul_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_mul_round_ph
+// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_maskz_mul_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mul_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mul_round_ps
+// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+  return _mm256_mul_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_mul_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_mul_round_ps
+// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_mul_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_mul_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_mul_round_ps
+// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_mul_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_range_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_range_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrangepd256
+  return _mm256_range_round_pd(__A, __B, 4, 8);
+}
+
+__m256d test_mm256_mask_range_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_range_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrangepd256
+  return _mm256_mask_range_round_pd(__W, __U, __A, __B, 4, 8);
+}
+
+__m256d test_mm256_maskz_range_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_range_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrangepd256
+  return _mm256_maskz_range_round_pd(__U, __A, __B, 4, 8);
+}
+
+__m256 test_mm256_range_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_range_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrangeps256
+  return _mm256_range_round_ps(__A, __B, 4, 8);
+}
+
+__m256 test_mm256_mask_range_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_range_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrangeps256
+  return _mm256_mask_range_round_ps(__W, __U, __A, __B, 4, 8);
+}
+
+__m256 test_mm256_maskz_range_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_range_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrangeps256
+  return _mm256_maskz_range_round_ps(__U, __A, __B, 4, 8);
+}
+
+__m256d test_mm256_reduce_round_pd(__m256d __A) {
+// CHECK-LABEL: @test_mm256_reduce_round_pd
+// CHECK: @llvm.x86.avx10.mask.vreducepd256
+  return _mm256_reduce_round_pd(__A, 4, 8);
+}
+
+__m256d test_mm256_mask_reduce_round_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_mask_reduce_round_pd
+// CHECK: @llvm.x86.avx10.mask.vreducepd256
+  return _mm256_mask_reduce_round_pd(__W, __U, __A, 4, 8);
+}
+
+__m256d test_mm256_maskz_reduce_round_pd(__mmask8 __U, __m256d __A) {
+// CHECK-LABEL: @test_mm256_maskz_reduce_round_pd
+// CHECK: @llvm.x86.avx10.mask.vreducepd256
+  return _mm256_maskz_reduce_round_pd(__U, __A, 4, 8);
+}
+
+__m256h test_mm256_mask_reduce_round_ph(__m256h __A, __mmask8 __U, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_reduce_round_ph
+// CHECK: @llvm.x86.avx10.mask.vreduceph256
+  return _mm256_mask_reduce_round_ph(__A, __U, __C, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_reduce_round_ph(__m256h __A, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_maskz_reduce_round_ph
+// CHECK: @llvm.x86.avx10.mask.vreduceph256
+  return _mm256_maskz_reduce_round_ph(__U, __A, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_reduce_round_ph(__m256h __A) {
+// CHECK-LABEL: @test_mm256_reduce_round_ph
+// CHECK: @llvm.x86.avx10.mask.vreduceph256
+  return _mm256_reduce_round_ph(__A, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_reduce_round_ps(__m256 __A) {
+// CHECK-LABEL: @test_mm256_reduce_round_ps
+// CHECK: @llvm.x86.avx10.mask.vreduceps256
+  return _mm256_reduce_round_ps(__A, 4, 8);
+}
+
+__m256 test_mm256_mask_reduce_round_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_mask_reduce_round_ps
+// CHECK: @llvm.x86.avx10.mask.vreduceps256
+  return _mm256_mask_reduce_round_ps(__W, __U, __A, 4, 8);
+}
+
+__m256 test_mm256_maskz_reduce_round_ps(__mmask8 __U, __m256 __A) {
+// CHECK-LABEL: @test_mm256_maskz_reduce_round_ps
+// CHECK: @llvm.x86.avx10.mask.vreduceps256
+  return _mm256_maskz_reduce_round_ps(__U, __A, 4, 8);
+}
+
+__m256d test_mm256_roundscale_round_pd(__m256d __A)
+{
+// CHECK-LABEL: @test_mm256_roundscale_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrndscalepd256
+  return _mm256_roundscale_round_pd(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_roundscale_round_pd(__m256d __A,__mmask8 __U,__m256d __C)
+{
+// CHECK-LABEL: @test_mm256_mask_roundscale_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrndscalepd256
+  return _mm256_mask_roundscale_round_pd(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_roundscale_round_pd(__m256d __A,__mmask8 __U)
+{
+// CHECK-LABEL: @test_mm256_maskz_roundscale_round_pd
+// CHECK: @llvm.x86.avx10.mask.vrndscalepd256
+  return _mm256_maskz_roundscale_round_pd(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_roundscale_round_ph(__m256h __A, __mmask8 __U, __m256h __C) {
+// CHECK-LABEL: @test_mm256_mask_roundscale_round_ph
+// CHECK: @llvm.x86.avx10.mask.vrndscaleph256
+  return _mm256_mask_roundscale_round_ph(__A, __U, __C, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_roundscale_round_ph(__m256h __A, __mmask8 __U) {
+// CHECK-LABEL: @test_mm256_maskz_roundscale_round_ph
+// CHECK: @llvm.x86.avx10.mask.vrndscaleph256
+  return _mm256_maskz_roundscale_round_ph(__U, __A, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_roundscale_round_ph(__m256h __A) {
+// CHECK-LABEL: @test_mm256_roundscale_round_ph
+// CHECK: @llvm.x86.avx10.mask.vrndscaleph256
+  return _mm256_roundscale_round_ph(__A, 3, _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_roundscale_round_ps(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_roundscale_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrndscaleps256
+  return _mm256_roundscale_round_ps(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_roundscale_round_ps(__m256 __A,__mmask8 __U,__m256 __C)
+{
+// CHECK-LABEL: @test_mm256_mask_roundscale_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrndscaleps256
+  return _mm256_mask_roundscale_round_ps(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_roundscale_round_ps(__m256 __A,__mmask8 __U)
+{
+// CHECK-LABEL: @test_mm256_maskz_roundscale_round_ps
+// CHECK: @llvm.x86.avx10.mask.vrndscaleps256
+  return _mm256_maskz_roundscale_round_ps(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_scalef_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_scalef_round_pd
+// CHECK: @llvm.x86.avx10.mask.vscalefpd256
+  return _mm256_scalef_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_scalef_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_scalef_round_pd
+// CHECK: @llvm.x86.avx10.mask.vscalefpd256
+  return _mm256_mask_scalef_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_scalef_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_scalef_round_pd
+// CHECK: @llvm.x86.avx10.mask.vscalefpd256
+  return _mm256_maskz_scalef_round_pd(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_scalef_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_scalef_round_ph
+// CHECK: @llvm.x86.avx10.mask.vscalefph256
+  return _mm256_scalef_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_scalef_round_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_scalef_round_ph
+// CHECK: @llvm.x86.avx10.mask.vscalefph256
+  return _mm256_mask_scalef_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_scalef_round_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_scalef_round_ph
+// CHECK: @llvm.x86.avx10.mask.vscalefph256
+  return _mm256_maskz_scalef_round_ph(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_scalef_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_scalef_round_ps
+// CHECK: @llvm.x86.avx10.mask.vscalefps256
+  return _mm256_scalef_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_scalef_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_scalef_round_ps
+// CHECK: @llvm.x86.avx10.mask.vscalefps256
+  return _mm256_mask_scalef_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_scalef_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_scalef_round_ps
+// CHECK: @llvm.x86.avx10.mask.vscalefps256
+  return _mm256_maskz_scalef_round_ps(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_sqrt_round_pd(__m256d __A)
+{
+// CHECK-LABEL: @test_mm256_sqrt_round_pd
+// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11)
+  return _mm256_sqrt_round_pd(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_sqrt_round_pd(__m256d __W,__mmask8 __U,__m256d __A)
+{
+// CHECK-LABEL: @test_mm256_mask_sqrt_round_pd
+// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11)
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_sqrt_round_pd(__mmask8 __U,__m256d __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_sqrt_round_pd
+// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11)
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> {{.*}}
+  return _mm256_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_sqrt_round_ph(__m256h __A) {
+// CHECK-LABEL: @test_mm256_sqrt_round_ph
+// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11)
+  return _mm256_sqrt_round_ph(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_sqrt_round_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_mask_sqrt_round_ph
+// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11)
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_sqrt_round_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_sqrt_round_ph(__mmask16 __U, __m256h __A) {
+// CHECK-LABEL: @test_mm256_maskz_sqrt_round_ph
+// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11)
+// CHECK: bitcast i16 %{{.*}} to <16 x i1>
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> {{.*}}
+  return _mm256_maskz_sqrt_round_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_sqrt_round_ps(__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_sqrt_round_ps
+// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11)
+  return _mm256_sqrt_round_ps(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_sqrt_round_ps(__m256 __W,__mmask8 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_mask_sqrt_round_ps
+// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11)
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_sqrt_round_ps(__mmask8 __U,__m256 __A)
+{
+// CHECK-LABEL: @test_mm256_maskz_sqrt_round_ps
+// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11)
+// CHECK: bitcast i8 %{{.*}} to <8 x i1>
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
+  return _mm256_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_sub_round_pd(__m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_sub_round_pd
+// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+  return _mm256_sub_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_mask_sub_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_mask_sub_round_pd
+// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_sub_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256d test_mm256_maskz_sub_round_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+// CHECK-LABEL: @test_mm256_maskz_sub_round_pd
+// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9)
+// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_sub_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_sub_round_ph(__m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_sub_round_ph
+// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11)
+  return _mm256_sub_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_mask_sub_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_mask_sub_round_ph
+// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_mask_sub_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm256_maskz_sub_round_ph(__mmask8 __U, __m256h __A, __m256h __B) {
+// CHECK-LABEL: @test_mm256_maskz_sub_round_ph
+// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9)
+// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}}
+  return _mm256_maskz_sub_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_sub_round_ps(__m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_sub_round_ps
+// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+  return _mm256_sub_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_mask_sub_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_mask_sub_round_ps
+// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_mask_sub_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm256_maskz_sub_round_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+// CHECK-LABEL: @test_mm256_maskz_sub_round_ps
+// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9)
+// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  return _mm256_maskz_sub_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 515b0d0fcc22c..4b80ef2189989 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5003,6 +5003,442 @@ let TargetPrefix = "x86" in {
       DefaultAttrsIntrinsic<[llvm_v8f32_ty],
                             [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
                             [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_mask_vcmppd256 :
+      DefaultAttrsIntrinsic<[llvm_v4i1_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vcmpph256 :
+      DefaultAttrsIntrinsic<[llvm_v16i1_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vcmpps256 :
+      DefaultAttrsIntrinsic<[llvm_v8i1_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vcvtpd2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+                            [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtpd2ph256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2ph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f16_ty],
+                            [llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtpd2ps256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2ps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f32_ty],
+                            [llvm_v4f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtpd2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtpd2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+                            [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtpd2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtpd2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2pd256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2pd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2psx256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2psx256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2uw256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2uw256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16i16_ty],
+                            [llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtph2w256 :
+      ClangBuiltin<"__builtin_ia32_vcvtph2w256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16i16_ty],
+                            [llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2pd256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2pd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2ph256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2ph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i16_ty],
+                            [llvm_v8f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_x86_avx10_mask_vcvtps2phx256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2phx256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f16_ty],
+                            [llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvtps2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvtps2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttpd2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttpd2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+                            [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttpd2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttpd2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttpd2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttpd2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+                            [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttpd2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttpd2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2uw256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2uw256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16i16_ty],
+                            [llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttph2w256 :
+      ClangBuiltin<"__builtin_ia32_vcvttph2w256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16i16_ty],
+                            [llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttps2dq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttps2dq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttps2qq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttps2qq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttps2udq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttps2udq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+                            [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vcvttps2uqq256 :
+      ClangBuiltin<"__builtin_ia32_vcvttps2uqq256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4i64_ty],
+                            [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_vdivpd256 :
+      ClangBuiltin<"__builtin_ia32_vdivpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vdivph256 :
+      ClangBuiltin<"__builtin_ia32_vdivph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vdivps256 :
+      ClangBuiltin<"__builtin_ia32_vdivps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_mask_vfcmaddcph256 :
+      ClangBuiltin<"__builtin_ia32_vfcmaddcph256_round_mask3">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_maskz_vfcmaddcph256 :
+      ClangBuiltin<"__builtin_ia32_vfcmaddcph256_round_maskz">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vfcmulcph256 :
+      ClangBuiltin<"__builtin_ia32_vfcmulcph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vfixupimmpd256 :
+      ClangBuiltin<"__builtin_ia32_vfixupimmpd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_maskz_vfixupimmpd256 :
+      ClangBuiltin<"__builtin_ia32_vfixupimmpd256_round_maskz">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_mask_vfixupimmps256 :
+      ClangBuiltin<"__builtin_ia32_vfixupimmps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_maskz_vfixupimmps256 :
+      ClangBuiltin<"__builtin_ia32_vfixupimmps256_round_maskz">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_vfmaddpd256 :
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_vfmaddph256 :
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_vfmaddps256 :
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vfmaddcph256 :
+      ClangBuiltin<"__builtin_ia32_vfmaddcph256_round_mask3">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_maskz_vfmaddcph256 :
+      ClangBuiltin<"__builtin_ia32_vfmaddcph256_round_maskz">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_vfmaddsubpd256 :
+      ClangBuiltin<"__builtin_ia32_vfmaddsubpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_vfmaddsubph256 :
+      ClangBuiltin<"__builtin_ia32_vfmaddsubph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_vfmaddsubps256 :
+      ClangBuiltin<"__builtin_ia32_vfmaddsubps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vfmulcph256 :
+      ClangBuiltin<"__builtin_ia32_vfmulcph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vgetexppd256 :
+      ClangBuiltin<"__builtin_ia32_vgetexppd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vgetexpph256 :
+      ClangBuiltin<"__builtin_ia32_vgetexpph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vgetexpps256 :
+      ClangBuiltin<"__builtin_ia32_vgetexpps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  def int_x86_avx10_mask_vgetmantpd256 :
+      ClangBuiltin<"__builtin_ia32_vgetmantpd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vgetmantph256 :
+      ClangBuiltin<"__builtin_ia32_vgetmantph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vgetmantps256 :
+      ClangBuiltin<"__builtin_ia32_vgetmantps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_vmaxpd256 :
+      ClangBuiltin<"__builtin_ia32_vmaxpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vmaxph256 :
+      ClangBuiltin<"__builtin_ia32_vmaxph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vmaxps256 :
+      ClangBuiltin<"__builtin_ia32_vmaxps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vminpd256 :
+      ClangBuiltin<"__builtin_ia32_vminpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vminph256 :
+      ClangBuiltin<"__builtin_ia32_vminph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vminps256 :
+      ClangBuiltin<"__builtin_ia32_vminps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vmulpd256 :
+      ClangBuiltin<"__builtin_ia32_vmulpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vmulph256 :
+      ClangBuiltin<"__builtin_ia32_vmulph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vmulps256 :
+      ClangBuiltin<"__builtin_ia32_vmulps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_mask_vrangepd256 :
+      ClangBuiltin<"__builtin_ia32_vrangepd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_mask_vrangeps256 :
+      ClangBuiltin<"__builtin_ia32_vrangeps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
+  def int_x86_avx10_mask_vreducepd256 :
+      ClangBuiltin<"__builtin_ia32_vreducepd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vreduceph256 :
+      ClangBuiltin<"__builtin_ia32_vreduceph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vreduceps256 :
+      ClangBuiltin<"__builtin_ia32_vreduceps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vrndscalepd256 :
+      ClangBuiltin<"__builtin_ia32_vrndscalepd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vrndscaleph256 :
+      ClangBuiltin<"__builtin_ia32_vrndscaleph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vrndscaleps256 :
+      ClangBuiltin<"__builtin_ia32_vrndscaleps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vscalefpd256 :
+      ClangBuiltin<"__builtin_ia32_vscalefpd256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vscalefph256 :
+      ClangBuiltin<"__builtin_ia32_vscalefph256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_mask_vscalefps256 :
+      ClangBuiltin<"__builtin_ia32_vscalefps256_round_mask">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+  def int_x86_avx10_vsqrtpd256 : ClangBuiltin<"__builtin_ia32_vsqrtpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_x86_avx10_vsqrtph256 : ClangBuiltin<"__builtin_ia32_vsqrtph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_x86_avx10_vsqrtps256 : ClangBuiltin<"__builtin_ia32_vsqrtps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_x86_avx10_vsubpd256 :
+      ClangBuiltin<"__builtin_ia32_vsubpd256_round">,
+      DefaultAttrsIntrinsic<[llvm_v4f64_ty],
+                            [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vsubph256 :
+      ClangBuiltin<"__builtin_ia32_vsubph256_round">,
+      DefaultAttrsIntrinsic<[llvm_v16f16_ty],
+                            [llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+  def int_x86_avx10_vsubps256 :
+      ClangBuiltin<"__builtin_ia32_vsubps256_round">,
+      DefaultAttrsIntrinsic<[llvm_v8f32_ty],
+                            [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 7324fdead1777..ee1c8144f681e 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -100,7 +100,7 @@ using namespace llvm::X86Disassembler;
 // this information is known, we have narrowed down to a single instruction.
 struct ModRMDecision {
   uint8_t modrm_type;
-  uint16_t instructionIDs;
+  uint32_t instructionIDs;
 };
 
 // Specifies which set of ModR/M->instruction tables to look at

diff  --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 666667895bc39..24d86ec2e41f6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -29,5 +29,279 @@ multiclass avx256_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
                                        v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
 }
 
-let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in
+multiclass avx256_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                                 X86SchedWriteSizes sched> {
+  defm PHZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.YMM,
+                                     v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.YMM,
+                                     v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.YMM,
+                                     v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_vcmp_p_sae<X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_vcmp_sae<sched.YMM, v16f16x_info>, AVX512PSIi8Base, EVEX_CD8<16, CD8VF>, TA;
+  defm PSZ256 : avx512_vcmp_sae<sched.YMM, v8f32x_info>, AVX512PSIi8Base, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_vcmp_sae<sched.YMM, v4f64x_info>, AVX512PDIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_fixupimm_packed_all<bits<8> opc, string OpcodeStr,
+                                      X86SchedWriteWidths sched> {
+  defm PSZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v8f32x_info,
+                                           v8i32x_info>, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fixupimm_packed_sae<opc, OpcodeStr, sched.YMM, v4f64x_info,
+                                           v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_vgetexp<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE,
+                          X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ph", v16f16x_info, OpNodeSAE,
+                                  sched.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"ps", v8f32x_info, OpNodeSAE,
+                                  sched.YMM>, T8,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fp28_p_sae<opc, OpcodeStr#"pd", v4f64x_info, OpNodeSAE,
+                                  sched.YMM>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_unary_fp_sae<string OpcodeStr, bits<8> opcPs, bits<8> opcPd,
+                               SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM,
+                                               v16f16x_info>, AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_unary_fp_sae_packed_imm<opcPs, OpcodeStr, OpNodeSAE, sched.YMM,
+                                               v8f32x_info>, AVX512AIi8Base, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_unary_fp_sae_packed_imm<opcPd, OpcodeStr, OpNodeSAE, sched.YMM,
+                                               v4f64x_info>, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_common_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNodeSAE,
+                                           X86SchedWriteWidths sched> {
+  defm PSZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"ps", OpNodeSAE, sched.YMM,
+                                         v8f32x_info>, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fp_sae_packed_imm<opc, OpcodeStr#"pd", OpNodeSAE, sched.YMM,
+                                         v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_fp_scalef_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                                  X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
+                                       v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
+                                       v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.YMM,
+                                       v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
+                                        X86SchedWriteSizes sched> {
+  defm PHZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
+                                         sched.PH.YMM, v16f16x_info>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
+                                         sched.PS.YMM, v8f32x_info>, TB, PS, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
+                                         sched.PD.YMM, v4f64x_info>, TB, PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_vcvtw_rc<string OpcodeStr, SDNode OpNodeRnd> {
+  defm PHZ256 : avx512_vcvt_fp_rc<0x7D, OpcodeStr, v16f16x_info, v16i16x_info, OpNodeRnd,
+                                  SchedWriteCvtPD2DQ.YMM>, EVEX_CD8<16, CD8VF>;
+}
+
+multiclass avx256_cvtdq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                              X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info,
+                                  v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,PS, EVEX_CD8<32, CD8VF>;
+  defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info,
+                                  v8i32x_info, OpNodeRnd, sched.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx256_cvtudq2fp_rc<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+                               X86SchedWriteWidths sched> {
+  defm PHZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ph"), v8f16x_info,
+                                  v8i32x_info, OpNodeRnd, sched.YMM>, T_MAP5,XD, EVEX_CD8<32, CD8VF>;
+  defm PSZ256 : avx512_vcvt_fp_rc<opc, !strconcat(OpcodeStr, "ps"), v8f32x_info,
+                                  v8i32x_info, OpNodeRnd, sched.YMM>, TB, XD, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx256_cvtqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> {
+  defm PHZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ph"), v8f16x_info,
+                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,PS;
+  defm PSZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "ps"), v4f32x_info,
+                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, PS;
+  defm PDZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "pd"), v4f64x_info,
+                                  _Src, X86VSintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS;
+}
+
+multiclass avx256_cvtuqq2fp_rc<string OpcodeStr, X86VectorVTInfo _Src> {
+  defm PHZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ph"), v8f16x_info,
+                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, T_MAP5,XD;
+  defm PSZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "ps"), v4f32x_info,
+                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PS.YMM>, TB, XD;
+  defm PDZ256 : avx512_vcvt_fp_rc<0x7A, !strconcat(OpcodeStr, "pd"), v4f64x_info,
+                                  _Src, X86VUintToFpRnd, SchedWriteCvtDQ2PD.YMM>, TB, XS;
+}
+
+multiclass avx256_vcvt_pd2<string OpcodeStr, X86VectorVTInfo _Src> {
+  defm PHZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ph"), v8f16x_info,
+                                  _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD;
+  defm PSZ256 : avx512_vcvt_fp_rc<0x5A, !strconcat(OpcodeStr, "ps"), v4f32x_info,
+                                  _Src, X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, TB, PD;
+  defm DQZ256 : avx512_vcvt_fp_rc<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info,
+                                  _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, XD;
+  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info,
+                                  _Src, X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD;
+  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v4i32x_info,
+                                   _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PS;
+  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info,
+                                   _Src, X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, TB, PD;
+}
+
+multiclass avx256_vcvt_ps2<string OpcodeStr> {
+  defm PHZ256 : avx512_cvtps2ph_sae<v8i16x_info, v8f32x_info, WriteCvtPS2PHZ>, EVEX_CD8<32, CD8VH>;
+  defm PHXZ256 : avx512_vcvt_fp_rc<0x1D, !strconcat(OpcodeStr, "phx"), v8f16x_info, v8f32x_info,
+                                   X86vfproundRnd, SchedWriteCvtPD2PS.YMM>, T_MAP5,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v4f32x_info,
+                                   X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
+  defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info,
+                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
+  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info,
+                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
+  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info,
+                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PS, EVEX_CD8<32, CD8VF>;
+  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info,
+                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, TB, PD, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx256_vcvt_ph2<string OpcodeStr> {
+  defm PSZ256 : avx512_cvtph2ps_sae<v8f32x_info, v8i16x_info, WriteCvtPH2PSZ>, EVEX_CD8<32, CD8VH>;
+  defm PSXZ256 : avx512_vcvt_fp_sae<0x13, !strconcat(OpcodeStr, "psx"), v8f32x_info, v8f16x_info,
+                                    X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP6,PD, EVEX_CD8<16, CD8VH>;
+  defm PDZ256 : avx512_vcvt_fp_sae<0x5A, !strconcat(OpcodeStr, "pd"), v4f64x_info, v8f16x_info,
+                                   X86vfpextSAE, SchedWriteCvtPS2PD.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VQ>;
+  defm WZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info,
+                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>;
+  defm DQZ256 : avx512_vcvt_fp_rc<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info,
+                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VH>;
+  defm QQZ256 : avx512_vcvt_fp_rc<0x7B, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info,
+                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
+  defm UWZ256 : avx512_vcvt_fp_rc<0x7D, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info,
+                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
+  defm UDQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info,
+                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>;
+  defm UQQZ256 : avx512_vcvt_fp_rc<0x79, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info,
+                                   X86cvtp2UIntRnd, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
+}
+
+multiclass avx256_vcvtt_pd2<string OpcodeStr, X86VectorVTInfo _Src> {
+  defm DQZ256 : avx512_vcvt_fp_sae<0xE6, !strconcat(OpcodeStr, "dq"), v4i32x_info,
+                                   _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD;
+  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info,
+                                   _Src, X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, PD;
+  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v4i32x_info,
+                                    _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PS;
+  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info,
+                                    _Src, X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, PD;
+}
+
+multiclass avx256_vcvtt_ps2<string OpcodeStr> {
+  defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f32x_info,
+                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, XS, EVEX_CD8<32, CD8VF>;
+  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v4f32x_info,
+                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>;
+  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f32x_info,
+                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PS, EVEX_CD8<32, CD8VF>;
+  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v4f32x_info,
+                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, PD, EVEX_CD8<32, CD8VH>;
+}
+
+multiclass avx256_vcvtt_ph2<string OpcodeStr> {
+  defm WZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "w"), v16i16x_info, v16f16x_info,
+                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VF>;
+  defm DQZ256 : avx512_vcvt_fp_sae<0x5B, !strconcat(OpcodeStr, "dq"), v8i32x_info, v8f16x_info,
+                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,XS, EVEX_CD8<16, CD8VH>;
+  defm QQZ256 : avx512_vcvt_fp_sae<0x7A, !strconcat(OpcodeStr, "qq"), v4i64x_info, v8f16x_info,
+                                   X86cvttp2siSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
+  defm UWZ256 : avx512_vcvt_fp_sae<0x7C, !strconcat(OpcodeStr, "uw"), v16i16x_info, v16f16x_info,
+                                   X86cvttp2uiSAE, SchedWriteCvtPD2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VF>;
+  defm UDQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "udq"), v8i32x_info, v8f16x_info,
+                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PS, EVEX_CD8<16, CD8VH>;
+  defm UQQZ256 : avx512_vcvt_fp_sae<0x78, !strconcat(OpcodeStr, "uqq"), v4i64x_info, v8f16x_info,
+                                    X86cvttp2uiSAE, SchedWriteCvtPS2DQ.YMM>, T_MAP5,PD, EVEX_CD8<16, CD8VQ>;
+}
+
+multiclass avx256_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+  defm PHZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fma3_132_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+  defm PHZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fma3_213_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
+  defm PHZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ph"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v16f16x_info>, T_MAP6,PD, EVEX_CD8<16, CD8VF>;
+  defm PSZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v8f32x_info>, T8,PD, EVEX_CD8<32, CD8VF>;
+  defm PDZ256 : avx512_fma3_231_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
+                                      SchedWriteFMA.YMM, v4f64x_info>, T8,PD, EVEX_CD8<64, CD8VF>, REX_W;
+}
+
+multiclass avx256_fma3_round3<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                              string OpcodeStr, SDNode OpNodeRnd> {
+  defm NAME#132 : avx256_fma3_132_round<opc132, !strconcat(OpcodeStr, "132"), OpNodeRnd>;
+  defm NAME#213 : avx256_fma3_213_round<opc213, !strconcat(OpcodeStr, "213"), OpNodeRnd>;
+  defm NAME#231 : avx256_fma3_231_round<opc231, !strconcat(OpcodeStr, "231"), OpNodeRnd>;
+}
+
+let Predicates = [HasAVX10_2], hasEVEX_U = 1, OpEnc = EncEVEX in {
   defm VADD : avx256_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
+  defm VMUL : avx256_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
+  defm VSUB : avx256_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
+  defm VDIV : avx256_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
+  defm VMIN : avx256_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
+  defm VMAX : avx256_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
+  defm VCMP : avx256_vcmp_p_sae<SchedWriteFCmp>, EVEX, VVVV;
+  defm VFIXUPIMM : avx256_fixupimm_packed_all<0x54, "vfixupimm", SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV;
+  defm VGETEXP : avx256_vgetexp<0x42, "vgetexp", X86fgetexpSAE, SchedWriteFRnd>;
+  defm VREDUCE : avx256_unary_fp_sae<"vreduce", 0x56, 0x56, X86VReduceSAE, SchedWriteFRnd>;
+  defm VRNDSCALE : avx256_unary_fp_sae<"vrndscale", 0x08, 0x09, X86VRndScaleSAE, SchedWriteFRnd>;
+  defm VGETMANT : avx256_unary_fp_sae<"vgetmant", 0x26, 0x26, X86VGetMantSAE, SchedWriteFRnd>;
+  defm VRANGE : avx256_common_fp_sae_packed_imm<0x50, "vrange", X86VRangeSAE, SchedWriteFAdd>, AVX512AIi8Base, EVEX, VVVV;
+  defm VSCALEF : avx256_fp_scalef_round<0x2C, "vscalef", X86scalefRnd, SchedWriteFAdd>;
+  defm VSQRT : avx256_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
+  defm VCVTW2 : avx256_vcvtw_rc<"vcvtw2ph", X86VSintToFpRnd>, T_MAP5, XS;
+  defm VCVTDQ2 : avx256_cvtdq2fp_rc<0x5B, "vcvtdq2", X86VSintToFpRnd, SchedWriteCvtDQ2PS>;
+  defm VCVTQQ2 : avx256_cvtqq2fp_rc<"vcvtqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
+  defm VCVTUW2 : avx256_vcvtw_rc<"vcvtuw2ph", X86VUintToFpRnd>, T_MAP5,XD;
+  defm VCVTUDQ2 : avx256_cvtudq2fp_rc<0x7A, "vcvtudq2", X86VUintToFpRnd, SchedWriteCvtDQ2PS>;
+  defm VCVTUQQ2 : avx256_cvtuqq2fp_rc<"vcvtuqq2", v4i64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
+  defm VCVTPD2 : avx256_vcvt_pd2<"vcvtpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, REX_W;
+  defm VCVTPS2 : avx256_vcvt_ps2<"vcvtps2">;
+  defm VCVTPH2 : avx256_vcvt_ph2<"vcvtph2">;
+  defm VCVTTPD2 : avx256_vcvtt_pd2<"vcvttpd2", v4f64x_info>, EVEX_CD8<64, CD8VF>, TB, REX_W;
+  defm VCVTTPS2 : avx256_vcvtt_ps2<"vcvttps2">, TB;
+  defm VCVTTPH2 : avx256_vcvtt_ph2<"vcvttph2">;
+  defm VFMADD : avx256_fma3_round3<0x98, 0xA8, 0xB8, "vfmadd", X86FmaddRnd>;
+  defm VFMSUB : avx256_fma3_round3<0x9A, 0xAA, 0xBA, "vfmsub", X86FmsubRnd>;
+  defm VFMADDSUB : avx256_fma3_round3<0x96, 0xA6, 0xB6, "vfmaddsub", X86FmaddsubRnd>;
+  defm VFMSUBADD : avx256_fma3_round3<0x97, 0xA7, 0xB7, "vfmsubadd", X86FmsubaddRnd>;
+  defm VFNMADD : avx256_fma3_round3<0x9C, 0xAC, 0xBC, "vfnmadd", X86FnmaddRnd>;
+  defm VFNMSUB : avx256_fma3_round3<0x9E, 0xAE, 0xBE, "vfnmsub", X86FnmsubRnd>;
+  defm VFMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfmulcph", x86vfmulcRnd, SchedWriteFMA.YMM,
+                                             v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XS, EVEX_CD8<32, CD8VF>;
+  defm VFCMULCPHZ256 : avx512_fp_round_packed<0xD6, "vfcmulcph", x86vfcmulcRnd, SchedWriteFMA.YMM,
+                                              v8f32x_info, "", "@earlyclobber $dst">, T_MAP6,XD, EVEX_CD8<32, CD8VF>;
+  defm VFMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfmaddcph", x86vfmaddcRnd,
+                                           v8f32x_info>, T_MAP6,XS, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>;
+  defm VFCMADDCPHZ256 : avx512_cfmaop_round<0x56, "vfcmaddcph", x86vfcmaddcRnd,
+                                            v8f32x_info>, T_MAP6,XD, EVEX_CD8<32, CD8VF>, Sched<[WriteFMAY]>;
+}

diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index da690aea43f5c..f9b8cb689694e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7830,7 +7830,7 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
 }
 // Conversion with SAE - suppress all exceptions
 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
+                              X86VectorVTInfo _Src, SDPatternOperator OpNodeSAE,
                               X86FoldableSchedWrite sched> {
   let Uses = [MXCSR] in
   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -12016,8 +12016,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
 
 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
                                       X86FoldableSchedWrite sched,
-                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
-  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
+                                      X86VectorVTInfo _, X86VectorVTInfo TblVT> {
 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
@@ -12069,7 +12068,8 @@ multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
                                       AVX512VLVectorVTInfo _Vec,
                                       AVX512VLVectorVTInfo _Tbl> {
   let Predicates = [HasAVX512] in
-    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
+    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", sched.ZMM, _Vec.info512, _Tbl.info512>,
+                avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
                                 EVEX, VVVV, EVEX_V512;
   let Predicates = [HasAVX512, HasVLX] in {

diff  --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 8060c4fdbf190..5825fffc770b0 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -89,9 +89,12 @@ static const X86InstrFMA3Group Groups[] = {
   FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
   FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
 
-#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
-  FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
-  FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
+#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs)                        \
+  FMA3GROUP_MASKED(Name, PDZ256##Suf, Attrs)                                   \
+  FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs)                                      \
+  FMA3GROUP_MASKED(Name, PHZ256##Suf, Attrs)                                   \
+  FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs)                                      \
+  FMA3GROUP_MASKED(Name, PSZ256##Suf, Attrs)                                   \
   FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
 
 #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \

diff  --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 000138e1837af..70b9adeb7418f 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -388,14 +388,190 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
     X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
     X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV,
                        0),
+    X86_INTRINSIC_DATA(avx10_mask_vcmppd256, CMP_MASK_CC, X86ISD::CMPMM,
+                       X86ISD::CMPMM_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcmpph256, CMP_MASK_CC, X86ISD::CMPMM,
+                       X86ISD::CMPMM_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcmpps256, CMP_MASK_CC, X86ISD::CMPMM,
+                       X86ISD::CMPMM_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2ph256, INTR_TYPE_1OP_MASK,
+                       X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2ps256, INTR_TYPE_1OP_MASK,
+                       X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtpd2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2pd256, INTR_TYPE_1OP_MASK_SAE,
+                       ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2psx256, INTR_TYPE_1OP_MASK_SAE,
+                       ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2uw256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtph2w256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2pd256, INTR_TYPE_1OP_MASK_SAE,
+                       ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2phx256, INTR_TYPE_1OP_MASK,
+                       X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvtps2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttpd2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttpd2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2uw256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttph2w256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttps2dq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttps2qq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttps2udq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vcvttps2uqq256, INTR_TYPE_1OP_MASK,
+                       X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vfcmaddcph256, CFMA_OP_MASK, X86ISD::VFCMADDC,
+                       X86ISD::VFCMADDC_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vfcmulcph256, INTR_TYPE_2OP_MASK,
+                       X86ISD::VFCMULC, X86ISD::VFCMULC_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vfixupimmpd256, FIXUPIMM, X86ISD::VFIXUPIMM,
+                       X86ISD::VFIXUPIMM_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vfixupimmps256, FIXUPIMM, X86ISD::VFIXUPIMM,
+                       X86ISD::VFIXUPIMM_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vfmaddcph256, CFMA_OP_MASK, X86ISD::VFMADDC,
+                       X86ISD::VFMADDC_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vfmulcph256, INTR_TYPE_2OP_MASK,
+                       X86ISD::VFMULC, X86ISD::VFMULC_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vgetexppd256, INTR_TYPE_1OP_MASK_SAE,
+                       X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vgetexpph256, INTR_TYPE_1OP_MASK_SAE,
+                       X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vgetexpps256, INTR_TYPE_1OP_MASK_SAE,
+                       X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vgetmantpd256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vgetmantph256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vgetmantps256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vrangepd256, INTR_TYPE_3OP_MASK_SAE,
+                       X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vrangeps256, INTR_TYPE_3OP_MASK_SAE,
+                       X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vreducepd256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vreduceph256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vreduceps256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vrndscalepd256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vrndscaleph256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vrndscaleps256, INTR_TYPE_2OP_MASK_SAE,
+                       X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+    X86_INTRINSIC_DATA(avx10_mask_vscalefpd256, INTR_TYPE_2OP_MASK,
+                       X86ISD::SCALEF, X86ISD::SCALEF_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vscalefph256, INTR_TYPE_2OP_MASK,
+                       X86ISD::SCALEF, X86ISD::SCALEF_RND),
+    X86_INTRINSIC_DATA(avx10_mask_vscalefps256, INTR_TYPE_2OP_MASK,
+                       X86ISD::SCALEF, X86ISD::SCALEF_RND),
+    X86_INTRINSIC_DATA(avx10_maskz_vfcmaddcph256, CFMA_OP_MASKZ,
+                       X86ISD::VFCMADDC, X86ISD::VFCMADDC_RND),
+    X86_INTRINSIC_DATA(avx10_maskz_vfixupimmpd256, FIXUPIMM_MASKZ,
+                       X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+    X86_INTRINSIC_DATA(avx10_maskz_vfixupimmps256, FIXUPIMM_MASKZ,
+                       X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+    X86_INTRINSIC_DATA(avx10_maskz_vfmaddcph256, CFMA_OP_MASKZ, X86ISD::VFMADDC,
+                       X86ISD::VFMADDC_RND),
     X86_INTRINSIC_DATA(avx10_vaddpd256, INTR_TYPE_2OP, ISD::FADD,
                        X86ISD::FADD_RND),
     X86_INTRINSIC_DATA(avx10_vaddph256, INTR_TYPE_2OP, ISD::FADD,
                        X86ISD::FADD_RND),
     X86_INTRINSIC_DATA(avx10_vaddps256, INTR_TYPE_2OP, ISD::FADD,
                        X86ISD::FADD_RND),
+    X86_INTRINSIC_DATA(avx10_vdivpd256, INTR_TYPE_2OP, ISD::FDIV,
+                       X86ISD::FDIV_RND),
+    X86_INTRINSIC_DATA(avx10_vdivph256, INTR_TYPE_2OP, ISD::FDIV,
+                       X86ISD::FDIV_RND),
+    X86_INTRINSIC_DATA(avx10_vdivps256, INTR_TYPE_2OP, ISD::FDIV,
+                       X86ISD::FDIV_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddpd256, INTR_TYPE_3OP, ISD::FMA,
+                       X86ISD::FMADD_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddph256, INTR_TYPE_3OP, ISD::FMA,
+                       X86ISD::FMADD_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddps256, INTR_TYPE_3OP, ISD::FMA,
+                       X86ISD::FMADD_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddsubpd256, INTR_TYPE_3OP, X86ISD::FMADDSUB,
+                       X86ISD::FMADDSUB_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddsubph256, INTR_TYPE_3OP, X86ISD::FMADDSUB,
+                       X86ISD::FMADDSUB_RND),
+    X86_INTRINSIC_DATA(avx10_vfmaddsubps256, INTR_TYPE_3OP, X86ISD::FMADDSUB,
+                       X86ISD::FMADDSUB_RND),
+    X86_INTRINSIC_DATA(avx10_vmaxpd256, INTR_TYPE_2OP_SAE, X86ISD::FMAX,
+                       X86ISD::FMAX_SAE),
+    X86_INTRINSIC_DATA(avx10_vmaxph256, INTR_TYPE_2OP_SAE, X86ISD::FMAX,
+                       X86ISD::FMAX_SAE),
+    X86_INTRINSIC_DATA(avx10_vmaxps256, INTR_TYPE_2OP_SAE, X86ISD::FMAX,
+                       X86ISD::FMAX_SAE),
+    X86_INTRINSIC_DATA(avx10_vminpd256, INTR_TYPE_2OP_SAE, X86ISD::FMIN,
+                       X86ISD::FMIN_SAE),
+    X86_INTRINSIC_DATA(avx10_vminph256, INTR_TYPE_2OP_SAE, X86ISD::FMIN,
+                       X86ISD::FMIN_SAE),
+    X86_INTRINSIC_DATA(avx10_vminps256, INTR_TYPE_2OP_SAE, X86ISD::FMIN,
+                       X86ISD::FMIN_SAE),
     X86_INTRINSIC_DATA(avx10_vmpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW,
                        0),
+    X86_INTRINSIC_DATA(avx10_vmulpd256, INTR_TYPE_2OP, ISD::FMUL,
+                       X86ISD::FMUL_RND),
+    X86_INTRINSIC_DATA(avx10_vmulph256, INTR_TYPE_2OP, ISD::FMUL,
+                       X86ISD::FMUL_RND),
+    X86_INTRINSIC_DATA(avx10_vmulps256, INTR_TYPE_2OP, ISD::FMUL,
+                       X86ISD::FMUL_RND),
+    X86_INTRINSIC_DATA(avx10_vsqrtpd256, INTR_TYPE_1OP, ISD::FSQRT,
+                       X86ISD::FSQRT_RND),
+    X86_INTRINSIC_DATA(avx10_vsqrtph256, INTR_TYPE_1OP, ISD::FSQRT,
+                       X86ISD::FSQRT_RND),
+    X86_INTRINSIC_DATA(avx10_vsqrtps256, INTR_TYPE_1OP, ISD::FSQRT,
+                       X86ISD::FSQRT_RND),
+    X86_INTRINSIC_DATA(avx10_vsubpd256, INTR_TYPE_2OP, ISD::FSUB,
+                       X86ISD::FSUB_RND),
+    X86_INTRINSIC_DATA(avx10_vsubph256, INTR_TYPE_2OP, ISD::FSUB,
+                       X86ISD::FSUB_RND),
+    X86_INTRINSIC_DATA(avx10_vsubps256, INTR_TYPE_2OP, ISD::FSUB,
+                       X86ISD::FSUB_RND),
     X86_INTRINSIC_DATA(avx2_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0),
     X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
     X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),

diff  --git a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
index 4080546c0c543..34d740302d744 100644
--- a/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll
@@ -214,3 +214,4256 @@ define <8 x float> @test_int_x86_maskz_vaddps256(i8 %A, <8 x float> %B, <8 x flo
   %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
   ret <8 x float> %ret
 }
+
+declare <4 x i1> @llvm.x86.avx10.mask.vcmppd256(<4 x double>, <4 x double>, i32, <4 x i1>, i32)
+define i4 @test_int_x86_vcmppd256(<4 x double> %A, <4 x double> %B, i4 %C) nounwind {
+; CHECK-LABEL: test_int_x86_vcmppd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmppd $0, {sae}, %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xf9,0x18,0xc2,0xc1,0x00]
+; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret0 = call <4 x i1> @llvm.x86.avx10.mask.vcmppd256(<4 x double> %A, <4 x double> %B, i32 0, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, i32 8)
+  %ret = bitcast <4 x i1> %ret0 to i4
+  ret i4 %ret
+}
+
+define i4 @test_int_x86_mask_vcmppd256(<4 x double> %A, <4 x double> %B, i4 %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcmppd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc2,0xc1,0x00]
+; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcmppd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcmpeqpd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc2,0xc1,0x00]
+; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %msk = bitcast i4 %C to <4 x i1>
+  %ret0 = call <4 x i1> @llvm.x86.avx10.mask.vcmppd256(<4 x double> %A, <4 x double> %B, i32 0, <4 x i1> %msk, i32 4)
+  %ret = bitcast <4 x i1> %ret0 to i4
+  ret i4 %ret
+}
+
+declare <16 x i1> @llvm.x86.avx10.mask.vcmpph256(<16 x half>, <16 x half>, i32, <16 x i1>, i32)
+define i16 @test_int_x86_vcmpph256(<16 x half> %A, <16 x half> %B, i16 %C) nounwind {
+; CHECK-LABEL: test_int_x86_vcmpph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpph $0, {sae}, %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x78,0x18,0xc2,0xc1,0x00]
+; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret0 = call <16 x i1> @llvm.x86.avx10.mask.vcmpph256(<16 x half> %A, <16 x half> %B, i32 0, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i32 8)
+  %ret = bitcast <16 x i1> %ret0 to i16
+  ret i16 %ret
+}
+
+define i16 @test_int_x86_mask_vcmpph256(<16 x half> %A, <16 x half> %B, i16 %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcmpph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcmpeqph %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7c,0x29,0xc2,0xc1,0x00]
+; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcmpph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcmpeqph %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7c,0x29,0xc2,0xc1,0x00]
+; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %msk = bitcast i16 %C to <16 x i1>
+  %ret0 = call <16 x i1> @llvm.x86.avx10.mask.vcmpph256(<16 x half> %A, <16 x half> %B, i32 0, <16 x i1> %msk, i32 4)
+  %ret = bitcast <16 x i1> %ret0 to i16
+  ret i16 %ret
+}
+
+declare <8 x i1> @llvm.x86.avx10.mask.vcmpps256(<8 x float>, <8 x float>, i32, <8 x i1>, i32)
+define i8 @test_int_x86_vcmpps256(<8 x float> %A, <8 x float> %B, i8 %C) nounwind {
+; CHECK-LABEL: test_int_x86_vcmpps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpps $0, {sae}, %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x78,0x18,0xc2,0xc1,0x00]
+; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret0 = call <8 x i1> @llvm.x86.avx10.mask.vcmpps256(<8 x float> %A, <8 x float> %B, i32 0, <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i32 8)
+  %ret = bitcast <8 x i1> %ret0 to i8
+  ret i8 %ret
+}
+
+define i8 @test_int_x86_mask_vcmpps256(<8 x float> %A, <8 x float> %B, i8 %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcmpps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc1,0x00]
+; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcmpps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcmpeqps %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc1,0x00]
+; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %msk = bitcast i8 %C to <8 x i1>
+  %ret0 = call <8 x i1> @llvm.x86.avx10.mask.vcmpps256(<8 x float> %A, <8 x float> %B, i32 0, <8 x i1> %msk, i32 4)
+  %ret = bitcast <8 x i1> %ret0 to i8
+  ret i8 %ret
+}
+
+declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32>, i32)
+define <8 x half> @test_int_x86_vcvtdq2ph256(<8 x i32> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtdq2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0x78,0x78,0x5b,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %A, i32 11)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtdq2ph256(<8 x half> %A, i8 %B, <8 x i32> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtdq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtdq2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x5b,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtdq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtdq2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x5b,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %C, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> %A
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtdq2ph256(i8 %A, <8 x i32> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtdq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtdq2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5b,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtdq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtdq2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5b,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %B, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> zeroinitializer
+  ret <8 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i32(<8 x i32>, i32)
+define <8 x float> @test_int_x86_vcvtdq2ps256(<8 x i32> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtdq2ps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ps {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x5b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i32(<8 x i32> %A, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vcvtdq2ps256(<8 x float> %A, i8 %B, <8 x i32> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtdq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtdq2ps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x5b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtdq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtdq2ps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x5b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i32(<8 x i32> %C, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vcvtdq2ps256(i8 %A, <8 x i32> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtdq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtdq2ps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtdq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtdq2ps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx512.sitofp.round.v8f32.v8i32(<8 x i32> %B, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvtpd2dq256(<4 x double>, <4 x i32>, i8, i32)
+define <4 x i32> @test_int_x86_vcvtpd2dq256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2dq {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfb,0x78,0xe6,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2dq256(<4 x double> %A, <4 x i32> undef, i8 -1, i32 11)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtpd2dq256(<4 x i32> %A, i8 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtpd2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2dq {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfb,0x59,0xe6,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2dq {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfb,0x59,0xe6,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2dq256(<4 x double> %C, <4 x i32> %A, i8 %B, i32 10)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtpd2dq256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2dq {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfb,0xb9,0xe6,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2dq {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfb,0xb9,0xe6,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2dq256(<4 x double> %B, <4 x i32> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i32> %ret
+}
+
+declare <8 x half> @llvm.x86.avx10.mask.vcvtpd2ph256(<4 x double>, <8 x half>, i8, i32)
+define <8 x half> @test_int_x86_vcvtpd2ph256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0xf9,0x78,0x5a,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtpd2ph256(<4 x double> %A, <8 x half> undef, i8 -1, i32 11)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtpd2ph256(<8 x half> %A, i8 %B, <4 x double> %C) {
+; X86-LABEL: test_int_x86_mask_vcvtpd2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xf9,0x59,0x5a,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xf9,0x59,0x5a,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtpd2ph256(<4 x double> %C, <8 x half> %A, i8 %B, i32 10)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtpd2ph256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xf9,0xb9,0x5a,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xf9,0xb9,0x5a,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtpd2ph256(<4 x double> %B, <8 x half> zeroinitializer, i8 %A, i32 9)
+  ret <8 x half> %ret
+}
+
+declare <4 x float> @llvm.x86.avx10.mask.vcvtpd2ps256(<4 x double>, <4 x float>, i8, i32)
+define <4 x float> @test_int_x86_vcvtpd2ps256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2ps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ps {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xf9,0x78,0x5a,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.avx10.mask.vcvtpd2ps256(<4 x double> %A, <4 x float> undef, i8 -1, i32 11)
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_mask_vcvtpd2ps256(<4 x float> %A, i8 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtpd2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2ps {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x5a,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2ps {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x5a,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.avx10.mask.vcvtpd2ps256(<4 x double> %C, <4 x float> %A, i8 %B, i32 10)
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_maskz_vcvtpd2ps256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2ps {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5a,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2ps {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5a,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.avx10.mask.vcvtpd2ps256(<4 x double> %B, <4 x float> zeroinitializer, i8 %A, i32 9)
+  ret <4 x float> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtpd2qq256(<4 x double>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtpd2qq256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2qq {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x7b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2qq256(<4 x double> %A, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtpd2qq256(<4 x i64> %A, i8 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtpd2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2qq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x7b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2qq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x7b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2qq256(<4 x double> %C, <4 x i64> %A, i8 %B, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtpd2qq256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2qq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x7b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2qq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x7b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2qq256(<4 x double> %B, <4 x i64> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvtpd2udq256(<4 x double>, <4 x i32>, i8, i32)
+define <4 x i32> @test_int_x86_vcvtpd2udq256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2udq {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xf8,0x78,0x79,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2udq256(<4 x double> %A, <4 x i32> undef, i8 -1, i32 11)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtpd2udq256(<4 x i32> %A, i8 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtpd2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2udq {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x59,0x79,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2udq {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x59,0x79,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2udq256(<4 x double> %C, <4 x i32> %A, i8 %B, i32 10)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtpd2udq256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2udq {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0xb9,0x79,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2udq {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0xb9,0x79,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvtpd2udq256(<4 x double> %B, <4 x i32> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtpd2uqq256(<4 x double>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtpd2uqq256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtpd2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2uqq {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x79,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2uqq256(<4 x double> %A, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtpd2uqq256(<4 x i64> %A, i8 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtpd2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2uqq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x79,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtpd2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2uqq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x79,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2uqq256(<4 x double> %C, <4 x i64> %A, i8 %B, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtpd2uqq256(i8 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtpd2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtpd2uqq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x79,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtpd2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtpd2uqq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x79,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtpd2uqq256(<4 x double> %B, <4 x i64> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtph2dq256(<8 x half>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvtph2dq256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2dq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x5b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2dq256(<8 x half> %A, <8 x i32> undef, i8 -1, i32 11)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtph2dq256(<8 x i32> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2dq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x5b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2dq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x5b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2dq256(<8 x half> %C, <8 x i32> %A, i8 %B, i32 10)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtph2dq256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2dq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x5b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2dq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x5b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2dq256(<8 x half> %B, <8 x i32> zeroinitializer, i8 %A, i32 9)
+  ret <8 x i32> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vcvtph2pd256(<8 x half>, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vcvtph2pd256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2pd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x5a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtph2pd256(<8 x half> %A, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vcvtph2pd256(<4 x double> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2pd {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x5a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2pd {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x5a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtph2pd256(<8 x half> %C, <4 x double> %A, i8 %B, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vcvtph2pd256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2pd {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2pd {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtph2pd256(<8 x half> %B, <4 x double> zeroinitializer, i8 %A, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vcvtph2psx256(<8 x half>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vcvtph2psx256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2psx256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf6,0x79,0x18,0x13,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vcvtph2psx256(<8 x half> %A, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vcvtph2psx256(<8 x float> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2psx256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2psx {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x79,0x19,0x13,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2psx256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2psx {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x79,0x19,0x13,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vcvtph2psx256(<8 x half> %C, <8 x float> %A, i8 %B, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vcvtph2psx256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2psx256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2psx {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0x99,0x13,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2psx256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2psx {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0x99,0x13,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vcvtph2psx256(<8 x half> %B, <8 x float> zeroinitializer, i8 %A, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtph2qq256(<8 x half>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtph2qq256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2qq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x7b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2qq256(<8 x half> %A, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtph2qq256(<4 x i64> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2qq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x7b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2qq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x7b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2qq256(<8 x half> %C, <4 x i64> %A, i8 %B, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtph2qq256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2qq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x7b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2qq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x7b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2qq256(<8 x half> %B, <4 x i64> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtph2udq256(<8 x half>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvtph2udq256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2udq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x79,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2udq256(<8 x half> %A, <8 x i32> undef, i8 -1, i32 11)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtph2udq256(<8 x i32> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2udq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x79,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2udq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x79,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2udq256(<8 x half> %C, <8 x i32> %A, i8 %B, i32 10)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtph2udq256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2udq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x79,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2udq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x79,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtph2udq256(<8 x half> %B, <8 x i32> zeroinitializer, i8 %A, i32 9)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtph2uqq256(<8 x half>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtph2uqq256(<8 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2uqq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x79,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2uqq256(<8 x half> %A, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtph2uqq256(<4 x i64> %A, i8 %B, <8 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2uqq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x79,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2uqq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x79,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2uqq256(<8 x half> %C, <4 x i64> %A, i8 %B, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtph2uqq256(i8 %A, <8 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2uqq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x79,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2uqq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x79,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtph2uqq256(<8 x half> %B, <4 x i64> zeroinitializer, i8 %A, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvtph2uw256(<16 x half>, <16 x i16>, i16, i32)
+define <16 x i16> @test_int_x86_vcvtph2uw256(<16 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2uw256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2uw {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x7d,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2uw256(<16 x half> %A, <16 x i16> undef, i16 -1, i32 11)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_mask_vcvtph2uw256(<16 x i16> %A, i16 %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2uw256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2uw {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x7d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2uw256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2uw {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x7d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2uw256(<16 x half> %C, <16 x i16> %A, i16 %B, i32 10)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_maskz_vcvtph2uw256(i16 %A, <16 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2uw256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2uw {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x7d,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2uw256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2uw {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x7d,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2uw256(<16 x half> %B, <16 x i16> zeroinitializer, i16 %A, i32 9)
+  ret <16 x i16> %ret
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvtph2w256(<16 x half>, <16 x i16>, i16, i32)
+define <16 x i16> @test_int_x86_vcvtph2w256(<16 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtph2w256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2w {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x7d,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2w256(<16 x half> %A, <16 x i16> undef, i16 -1, i32 11)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_mask_vcvtph2w256(<16 x i16> %A, i16 %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtph2w256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2w {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x7d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtph2w256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2w {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x7d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2w256(<16 x half> %C, <16 x i16> %A, i16 %B, i32 10)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_maskz_vcvtph2w256(i16 %A, <16 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtph2w256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtph2w {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x7d,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtph2w256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtph2w {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x7d,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvtph2w256(<16 x half> %B, <16 x i16> zeroinitializer, i16 %A, i32 9)
+  ret <16 x i16> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtps2dq256(<8 x float>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvtps2dq256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2dq {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x79,0x78,0x5b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2dq256(<8 x float> %a, <8 x i32> undef, i8 -1, i32 11)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtps2dq256(<8 x i32> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2dq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x5b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2dq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x5b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2dq256(<8 x float> %c, <8 x i32> %a, i8 %b, i32 10)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtps2dq256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2dq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x5b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2dq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x5b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2dq256(<8 x float> %b, <8 x i32> zeroinitializer, i8 %a, i32 9)
+  ret <8 x i32> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vcvtps2pd256(<4 x float>, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vcvtps2pd256(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2pd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2pd {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x18,0x5a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtps2pd256(<4 x float> %a, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vcvtps2pd256(<4 x double> %a, i8 %b, <4 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2pd {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x19,0x5a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2pd {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x19,0x5a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtps2pd256(<4 x float> %c, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vcvtps2pd256(i8 %a, <4 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2pd {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2pd {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vcvtps2pd256(<4 x float> %b, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <8 x half> @llvm.x86.avx10.mask.vcvtps2phx256(<8 x float>, <8 x half>, i8, i32)
+define <8 x half> @test_int_x86_vcvtps2phx256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2phx256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0x79,0x78,0x1d,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtps2phx256(<8 x float> %a, <8 x half> undef, i8 -1, i32 11)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtps2phx256(<8 x half> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2phx256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2phx {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x1d,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2phx256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2phx {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x79,0x59,0x1d,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtps2phx256(<8 x float> %c, <8 x half> %a, i8 %b, i32 10)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtps2phx256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2phx256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2phx {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x1d,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2phx256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2phx {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xb9,0x1d,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx10.mask.vcvtps2phx256(<8 x float> %b, <8 x half> zeroinitializer, i8 %a, i32 9)
+  ret <8 x half> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtps2qq256(<4 x float>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtps2qq256(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2qq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf1,0x79,0x78,0x7b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2qq256(<4 x float> %a, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtps2qq256(<4 x i64> %a, i8 %b, <4 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2qq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x7b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2qq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x7b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2qq256(<4 x float> %c, <4 x i64> %a, i8 %b, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtps2qq256(i8 %a, <4 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2qq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x7b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2qq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x7b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2qq256(<4 x float> %b, <4 x i64> zeroinitializer, i8 %a, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtps2udq256(<8 x float>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvtps2udq256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2udq {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x79,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2udq256(<8 x float> %a, <8 x i32> undef, i8 -1, i32 11)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtps2udq256(<8 x i32> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2udq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x79,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2udq {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x79,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2udq256(<8 x float> %c, <8 x i32> %a, i8 %b, i32 10)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtps2udq256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2udq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x79,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2udq {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x79,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvtps2udq256(<8 x float> %b, <8 x i32> zeroinitializer, i8 %a, i32 9)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvtps2uqq256(<4 x float>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvtps2uqq256(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtps2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2uqq {rz-sae}, %xmm0, %ymm0 # encoding: [0x62,0xf1,0x79,0x78,0x79,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2uqq256(<4 x float> %a, <4 x i64> undef, i8 -1, i32 11)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtps2uqq256(<4 x i64> %a, i8 %b, <4 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtps2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2uqq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x79,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtps2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2uqq {ru-sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x59,0x79,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2uqq256(<4 x float> %c, <4 x i64> %a, i8 %b, i32 10)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtps2uqq256(i8 %a, <4 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtps2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtps2uqq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x79,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtps2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtps2uqq {rd-sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0xb9,0x79,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvtps2uqq256(<4 x float> %b, <4 x i64> zeroinitializer, i8 %a, i32 9)
+  ret <4 x i64> %ret
+}
+
+declare <4 x double> @llvm.x86.avx512.sitofp.round.v4f64.v4i64(<4 x i64>, i32)
+define <4 x double> @test_int_x86_vcvtqq2pd256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtqq2pd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfa,0x18,0xe6,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx512.sitofp.round.v4f64.v4i64(<4 x i64> %a, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vcvtqq2pd256(<4 x double> %a, i4 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtqq2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2pd {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfa,0x19,0xe6,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtqq2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2pd {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfa,0x19,0xe6,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx512.sitofp.round.v4f64.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %a
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vcvtqq2pd256(i4 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtqq2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2pd {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfa,0x99,0xe6,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtqq2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2pd {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfa,0x99,0xe6,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx512.sitofp.round.v4f64.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i4 %a to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v4i64(<4 x i64>, i32)
+define <8 x half> @test_int_x86_vcvtqq2ph256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtqq2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph {rn-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0xf8,0x18,0x5b,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v4i64(<4 x i64> %a, i32 8)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtqq2ph256(<8 x half> %a, i8 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtqq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2ph {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xf8,0x19,0x5b,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtqq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2ph {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xf8,0x19,0x5b,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> %a
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtqq2ph256(i8 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtqq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2ph {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xf8,0x99,0x5b,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtqq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2ph {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xf8,0x99,0x5b,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i8 %a to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> zeroinitializer
+  ret <8 x half> %ret
+}
+
+declare <4 x float> @llvm.x86.avx512.sitofp.round.v4f32.v4i64(<4 x i64>, i32)
+define <4 x float> @test_int_x86_vcvtqq2ps256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtqq2ps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xf8,0x18,0x5b,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.avx512.sitofp.round.v4f32.v4i64(<4 x i64> %a, i32 8)
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_mask_vcvtqq2ps256(<4 x float> %a, i4 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtqq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2ps {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x19,0x5b,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtqq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2ps {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x19,0x5b,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x float> @llvm.x86.avx512.sitofp.round.v4f32.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x float> %ret0, <4 x float> %a
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_maskz_vcvtqq2ps256(i4 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtqq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtqq2ps {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0x99,0x5b,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtqq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtqq2ps {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0x99,0x5b,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x float> @llvm.x86.avx512.sitofp.round.v4f32.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i4 %a to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x float> %ret0, <4 x float> zeroinitializer
+  ret <4 x float> %ret
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dq256(<4 x double>, <4 x i32>, i8, i32)
+define <4 x i32> @test_int_x86_vcvttpd2dq256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttpd2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttpd2dq {sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xf9,0x18,0xe6,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dq256(<4 x double> %a, <4 x i32> undef, i8 -1, i32 8)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_mask_vcvttpd2dq256(<4 x i32> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttpd2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2dq {sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0xe6,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttpd2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2dq {sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0xe6,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dq256(<4 x double> %c, <4 x i32> %a, i8 %b, i32 8)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvttpd2dq256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttpd2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2dq {sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0xe6,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttpd2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2dq {sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0xe6,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dq256(<4 x double> %b, <4 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qq256(<4 x double>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttpd2qq256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttpd2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttpd2qq {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x18,0x7a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qq256(<4 x double> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttpd2qq256(<4 x i64> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttpd2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2qq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0x7a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttpd2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2qq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0x7a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qq256(<4 x double> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttpd2qq256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttpd2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2qq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x7a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttpd2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2qq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x7a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qq256(<4 x double> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udq256(<4 x double>, <4 x i32>, i8, i32)
+define <4 x i32> @test_int_x86_vcvttpd2udq256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttpd2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttpd2udq {sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xf8,0x18,0x78,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udq256(<4 x double> %a, <4 x i32> undef, i8 -1, i32 8)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_mask_vcvttpd2udq256(<4 x i32> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttpd2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2udq {sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x19,0x78,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttpd2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2udq {sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xf8,0x19,0x78,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udq256(<4 x double> %c, <4 x i32> %a, i8 %b, i32 8)
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvttpd2udq256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttpd2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2udq {sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0x99,0x78,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttpd2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2udq {sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xf8,0x99,0x78,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udq256(<4 x double> %b, <4 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqq256(<4 x double>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttpd2uqq256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttpd2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttpd2uqq {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x18,0x78,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqq256(<4 x double> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttpd2uqq256(<4 x i64> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttpd2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2uqq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0x78,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttpd2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2uqq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x19,0x78,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqq256(<4 x double> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttpd2uqq256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttpd2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttpd2uqq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x78,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttpd2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttpd2uqq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x78,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqq256(<4 x double> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttph2dq256(<8 x half>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvttph2dq256(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x7a,0x18,0x5b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2dq256(<8 x half> %a, <8 x i32> undef, i8 -1, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvttph2dq256(<8 x i32> %a, i8 %b, <8 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2dq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7a,0x19,0x5b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2dq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7a,0x19,0x5b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2dq256(<8 x half> %c, <8 x i32> %a, i8 %b, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvttph2dq256(i8 %a, <8 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2dq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7a,0x99,0x5b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2dq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7a,0x99,0x5b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2dq256(<8 x half> %b, <8 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttph2qq256(<8 x half>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttph2qq256(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x18,0x7a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2qq256(<8 x half> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttph2qq256(<4 x i64> %a, i8 %b, <8 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2qq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x7a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2qq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x7a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2qq256(<8 x half> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttph2qq256(i8 %a, <8 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2qq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x7a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2qq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x7a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2qq256(<8 x half> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttph2udq256(<8 x half>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvttph2udq256(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x78,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2udq256(<8 x half> %a, <8 x i32> undef, i8 -1, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvttph2udq256(<8 x i32> %a, i8 %b, <8 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2udq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x78,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2udq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x78,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2udq256(<8 x half> %c, <8 x i32> %a, i8 %b, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvttph2udq256(i8 %a, <8 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2udq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x78,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2udq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x78,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttph2udq256(<8 x half> %b, <8 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttph2uqq256(<8 x half>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttph2uqq256(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x18,0x78,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2uqq256(<8 x half> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttph2uqq256(<4 x i64> %a, i8 %b, <8 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2uqq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x78,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2uqq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x78,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2uqq256(<8 x half> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttph2uqq256(i8 %a, <8 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2uqq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x78,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2uqq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x78,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttph2uqq256(<8 x half> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvttph2uw256(<16 x half>, <16 x i16>, i16, i32)
+define <16 x i16> @test_int_x86_vcvttph2uw256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2uw256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x7c,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2uw256(<16 x half> %a, <16 x i16> undef, i16 -1, i32 8)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_mask_vcvttph2uw256(<16 x i16> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2uw256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2uw {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x7c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2uw256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2uw {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x7c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2uw256(<16 x half> %c, <16 x i16> %a, i16 %b, i32 8)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_maskz_vcvttph2uw256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2uw256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2uw {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x7c,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2uw256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2uw {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x7c,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2uw256(<16 x half> %b, <16 x i16> zeroinitializer, i16 %a, i32 8)
+  ret <16 x i16> %ret
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvttph2w256(<16 x half>, <16 x i16>, i16, i32)
+define <16 x i16> @test_int_x86_vcvttph2w256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttph2w256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x18,0x7c,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2w256(<16 x half> %a, <16 x i16> undef, i16 -1, i32 8)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_mask_vcvttph2w256(<16 x i16> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttph2w256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2w {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x7c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttph2w256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2w {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x7c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2w256(<16 x half> %c, <16 x i16> %a, i16 %b, i32 8)
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @test_int_x86_maskz_vcvttph2w256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttph2w256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttph2w {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x7c,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttph2w256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttph2w {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x7c,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x i16> @llvm.x86.avx10.mask.vcvttph2w256(<16 x half> %b, <16 x i16> zeroinitializer, i16 %a, i32 8)
+  ret <16 x i16> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2dq256(<8 x float>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvttps2dq256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttps2dq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttps2dq {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7a,0x18,0x5b,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dq256(<8 x float> %a, <8 x i32> undef, i8 -1, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvttps2dq256(<8 x i32> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttps2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2dq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7a,0x19,0x5b,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttps2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2dq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7a,0x19,0x5b,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dq256(<8 x float> %c, <8 x i32> %a, i8 %b, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvttps2dq256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttps2dq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2dq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7a,0x99,0x5b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttps2dq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2dq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7a,0x99,0x5b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dq256(<8 x float> %b, <8 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttps2qq256(<4 x float>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttps2qq256(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttps2qq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttps2qq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf1,0x79,0x18,0x7a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2qq256(<4 x float> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttps2qq256(<4 x i64> %a, i8 %b, <4 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttps2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2qq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x19,0x7a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttps2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2qq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x19,0x7a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2qq256(<4 x float> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttps2qq256(i8 %a, <4 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttps2qq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2qq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0x99,0x7a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttps2qq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2qq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0x99,0x7a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2qq256(<4 x float> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2udq256(<8 x float>, <8 x i32>, i8, i32)
+define <8 x i32> @test_int_x86_vcvttps2udq256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttps2udq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttps2udq {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x18,0x78,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udq256(<8 x float> %a, <8 x i32> undef, i8 -1, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_mask_vcvttps2udq256(<8 x i32> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttps2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2udq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x19,0x78,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttps2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2udq {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x19,0x78,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udq256(<8 x float> %c, <8 x i32> %a, i8 %b, i32 8)
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvttps2udq256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttps2udq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2udq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x78,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttps2udq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2udq {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x78,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udq256(<8 x float> %b, <8 x i32> zeroinitializer, i8 %a, i32 8)
+  ret <8 x i32> %ret
+}
+
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqq256(<4 x float>, <4 x i64>, i8, i32)
+define <4 x i64> @test_int_x86_vcvttps2uqq256(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvttps2uqq256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttps2uqq {sae}, %xmm0, %ymm0 # encoding: [0x62,0xf1,0x79,0x18,0x78,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqq256(<4 x float> %a, <4 x i64> undef, i8 -1, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_mask_vcvttps2uqq256(<4 x i64> %a, i8 %b, <4 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvttps2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2uqq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x19,0x78,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvttps2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2uqq {sae}, %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x79,0x19,0x78,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqq256(<4 x float> %c, <4 x i64> %a, i8 %b, i32 8)
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvttps2uqq256(i8 %a, <4 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvttps2uqq256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvttps2uqq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0x99,0x78,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvttps2uqq256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvttps2uqq {sae}, %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x79,0x99,0x78,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqq256(<4 x float> %b, <4 x i64> zeroinitializer, i8 %a, i32 8)
+  ret <4 x i64> %ret
+}
+
+declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32>, i32)
+define <8 x half> @test_int_x86_vcvtudq2ph256(<8 x i32> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtudq2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph {rz-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0x7b,0x78,0x7a,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %A, i32 11)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtudq2ph256(<8 x half> %A, i8 %B, <8 x i32> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtudq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtudq2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7b,0x59,0x7a,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtudq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtudq2ph {ru-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7b,0x59,0x7a,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %C, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> %A
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtudq2ph256(i8 %A, <8 x i32> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtudq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtudq2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7b,0xb9,0x7a,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtudq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtudq2ph {rd-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7b,0xb9,0x7a,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %B, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> zeroinitializer
+  ret <8 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i32(<8 x i32>, i32)
+define <8 x float> @test_int_x86_vcvtudq2ps256(<8 x i32> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtudq2ps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ps {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7b,0x78,0x7a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i32(<8 x i32> %A, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vcvtudq2ps256(<8 x float> %A, i8 %B, <8 x i32> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtudq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtudq2ps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7b,0x59,0x7a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtudq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtudq2ps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7b,0x59,0x7a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i32(<8 x i32> %C, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vcvtudq2ps256(i8 %A, <8 x i32> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtudq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtudq2ps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7b,0xb9,0x7a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtudq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtudq2ps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7b,0xb9,0x7a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx512.uitofp.round.v8f32.v8i32(<8 x i32> %B, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx512.uitofp.round.v4f64.v4i64(<4 x i64>, i32)
+define <4 x double> @test_int_x86_vcvtuqq2pd256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtuqq2pd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfa,0x18,0x7a,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx512.uitofp.round.v4f64.v4i64(<4 x i64> %a, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vcvtuqq2pd256(<4 x double> %a, i4 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtuqq2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2pd {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfa,0x19,0x7a,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtuqq2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2pd {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfa,0x19,0x7a,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx512.uitofp.round.v4f64.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %a
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vcvtuqq2pd256(i4 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtuqq2pd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2pd {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfa,0x99,0x7a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtuqq2pd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2pd {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfa,0x99,0x7a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx512.uitofp.round.v4f64.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i4 %a to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v4i64(<4 x i64>, i32)
+define <8 x half> @test_int_x86_vcvtuqq2ph256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtuqq2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph {rn-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf5,0xfb,0x18,0x7a,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v4i64(<4 x i64> %a, i32 8)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_mask_vcvtuqq2ph256(<8 x half> %a, i8 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtuqq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2ph {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfb,0x19,0x7a,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtuqq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2ph {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfb,0x19,0x7a,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> %a
+  ret <8 x half> %ret
+}
+
+define <8 x half> @test_int_x86_maskz_vcvtuqq2ph256(i8 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtuqq2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2ph {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfb,0x99,0x7a,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtuqq2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2ph {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfb,0x99,0x7a,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i8 %a to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x half> %ret0, <8 x half> zeroinitializer
+  ret <8 x half> %ret
+}
+
+declare <4 x float> @llvm.x86.avx512.uitofp.round.v4f32.v4i64(<4 x i64>, i32)
+define <4 x float> @test_int_x86_vcvtuqq2ps256(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtuqq2ps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfb,0x18,0x7a,0xc0]
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.avx512.uitofp.round.v4f32.v4i64(<4 x i64> %a, i32 8)
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_mask_vcvtuqq2ps256(<4 x float> %a, i4 %b, <4 x i64> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtuqq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2ps {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfb,0x19,0x7a,0xc1]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtuqq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2ps {rn-sae}, %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfb,0x19,0x7a,0xc1]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x float> @llvm.x86.avx512.uitofp.round.v4f32.v4i64(<4 x i64> %c, i32 8)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x float> %ret0, <4 x float> %a
+  ret <4 x float> %ret
+}
+
+define <4 x float> @test_int_x86_maskz_vcvtuqq2ps256(i4 %a, <4 x i64> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtuqq2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuqq2ps {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfb,0x99,0x7a,0xc0]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtuqq2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuqq2ps {rn-sae}, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfb,0x99,0x7a,0xc0]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x float> @llvm.x86.avx512.uitofp.round.v4f32.v4i64(<4 x i64> %b, i32 8)
+  %msk = bitcast i4 %a to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x float> %ret0, <4 x float> zeroinitializer
+  ret <4 x float> %ret
+}
+
+declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i16(<16 x i16>, i32)
+define <16 x half> @test_int_x86_vcvtuw2ph256(<16 x i16> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtuw2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph {rn-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7b,0x18,0x7d,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i16(<16 x i16> %a, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vcvtuw2ph256(<16 x half> %a, i16 %b, <16 x i16> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtuw2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuw2ph {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7b,0x19,0x7d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtuw2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuw2ph {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7b,0x19,0x7d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i16(<16 x i16> %c, i32 8)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %a
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vcvtuw2ph256(i16 %a, <16 x i16> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtuw2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtuw2ph {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7b,0x99,0x7d,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtuw2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtuw2ph {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7b,0x99,0x7d,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i16(<16 x i16> %b, i32 8)
+  %msk = bitcast i16 %a to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i16(<16 x i16>, i32)
+define <16 x half> @test_int_x86_vcvtw2ph256(<16 x i16> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vcvtw2ph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph {rn-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7a,0x18,0x7d,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i16(<16 x i16> %a, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vcvtw2ph256(<16 x half> %a, i16 %b, <16 x i16> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vcvtw2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtw2ph {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7a,0x19,0x7d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vcvtw2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtw2ph {rn-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7a,0x19,0x7d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i16(<16 x i16> %c, i32 8)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %a
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vcvtw2ph256(i16 %a, <16 x i16> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vcvtw2ph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcvtw2ph {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7a,0x99,0x7d,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vcvtw2ph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vcvtw2ph {rn-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7a,0x99,0x7d,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i16(<16 x i16> %b, i32 8)
+  %msk = bitcast i16 %a to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vdivpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vdivpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vdivpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x5e,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vdivpd256(<4 x double> %A, <4 x double> %B, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vdivpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vdivpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x5e,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vdivpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x5e,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vdivpd256(<4 x double> %C, <4 x double> %D, i32 10)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vdivpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vdivpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5e,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vdivpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5e,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vdivpd256(<4 x double> %B, <4 x double> %C, i32 9)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vdivph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vdivph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vdivph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x5e,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vdivph256(<16 x half> %A, <16 x half> %B, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vdivph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vdivph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x5e,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vdivph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x5e,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vdivph256(<16 x half> %C, <16 x half> %D, i32 10)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vdivph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vdivph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5e,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vdivph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5e,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vdivph256(<16 x half> %B, <16 x half> %C, i32 9)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vdivps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vdivps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vdivps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vdivps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x5e,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vdivps256(<8 x float> %A, <8 x float> %B, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vdivps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vdivps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x5e,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vdivps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x5e,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vdivps256(<8 x float> %C, <8 x float> %D, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vdivps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vdivps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vdivps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5e,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vdivps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vdivps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5e,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vdivps256(<8 x float> %B, <8 x float> %C, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vfcmaddcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vfcmaddcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfcmaddcph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfcmaddcph {rz-sae}, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf6,0x7b,0x78,0x56,0xd1]
+; CHECK-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfcmaddcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfcmaddcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfcmaddcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfcmaddcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7b,0x59,0x56,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfcmaddcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfcmaddcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7b,0x59,0x56,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfcmaddcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 10)
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.maskz.vfcmaddcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_maskz_vfcmaddcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfcmaddcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfcmaddcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7b,0xb9,0x56,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfcmaddcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfcmaddcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7b,0xb9,0x56,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.maskz.vfcmaddcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 9)
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vfcmulcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vfcmulcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfcmulcph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfcmulcph {rz-sae}, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf6,0x7b,0x78,0xd6,0xd1]
+; CHECK-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfcmulcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfcmulcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfcmulcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfcmulcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7b,0x59,0xd6,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfcmulcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfcmulcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7b,0x59,0xd6,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfcmulcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 10)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vfcmulcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfcmulcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfcmulcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7b,0xb9,0xd6,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfcmulcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfcmulcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7b,0xb9,0xd6,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfcmulcph256(<8 x float> %a, <8 x float> %c, <8 x float> zeroinitializer, i8 %b, i32 9)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vfixupimmpd256(<4 x double>, <4 x double>, <4 x i64>, i32, i8, i32)
+define <4 x double> @test_int_x86_vfixupimmpd256(<4 x double> %a, <4 x double> %b, <4 x i64> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfixupimmpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfixupimmpd $127, {sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf1,0x18,0x54,0xc2,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vfixupimmpd256(<4 x double> %a, <4 x double> %b, <4 x i64> %c, i32 127, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vfixupimmpd256(<4 x double> %a, i8 %b, <4 x double> %c, <4 x i64> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfixupimmpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfixupimmpd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf1,0x19,0x54,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfixupimmpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfixupimmpd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf1,0x19,0x54,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vfixupimmpd256(<4 x double> %a, <4 x double> %c, <4 x i64> %d, i32 127, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.maskz.vfixupimmpd256(<4 x double>, <4 x double>, <4 x i64>, i32, i8, i32)
+define <4 x double> @test_int_x86_maskz_vfixupimmpd256(<4 x double> %a, i8 %b, <4 x double> %c, <4 x i64> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfixupimmpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfixupimmpd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf1,0x99,0x54,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfixupimmpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfixupimmpd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf1,0x99,0x54,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.maskz.vfixupimmpd256(<4 x double> %a, <4 x double> %c, <4 x i64> %d, i32 127, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vfixupimmps256(<8 x float>, <8 x float>, <8 x i32>, i32, i8, i32)
+define <8 x float> @test_int_x86_vfixupimmps256(<8 x float> %a, <8 x float> %b, <8 x i32> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfixupimmps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfixupimmps $127, {sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x71,0x18,0x54,0xc2,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfixupimmps256(<8 x float> %a, <8 x float> %b, <8 x i32> %c, i32 127, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfixupimmps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x i32> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfixupimmps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfixupimmps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x71,0x19,0x54,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfixupimmps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfixupimmps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x71,0x19,0x54,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfixupimmps256(<8 x float> %a, <8 x float> %c, <8 x i32> %d, i32 127, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.maskz.vfixupimmps256(<8 x float>, <8 x float>, <8 x i32>, i32, i8, i32)
+define <8 x float> @test_int_x86_maskz_vfixupimmps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x i32> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfixupimmps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfixupimmps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x71,0x99,0x54,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfixupimmps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfixupimmps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x71,0x99,0x54,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.maskz.vfixupimmps256(<8 x float> %a, <8 x float> %c, <8 x i32> %d, i32 127, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vfmaddpd256(<4 x double>, <4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vfmaddpd256(<4 x double> %a, <4 x double> %b, <4 x double> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmadd213pd {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf1,0x78,0xa8,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vfmaddpd256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vfmaddpd256(<4 x double> %a, i4 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132pd {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xe9,0x59,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132pd {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xe9,0x59,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vfmaddpd256(<4 x double> %a, <4 x double> %c, <4 x double> %d, i32 10)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %a
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vfmaddpd256(<4 x double> %a, i4 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132pd {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xe9,0xb9,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132pd {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xe9,0xb9,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vfmaddpd256(<4 x double> %a, <4 x double> %c, <4 x double> %d, i32 9)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vfmaddph256(<16 x half>, <16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vfmaddph256(<16 x half> %a, <16 x half> %b, <16 x half> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmadd213ph {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x71,0x78,0xa8,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vfmaddph256(<16 x half> %a, <16 x half> %b, <16 x half> %c, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vfmaddph256(<16 x half> %a, i16 %b, <16 x half> %c, <16 x half> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132ph {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x69,0x59,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132ph {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x69,0x59,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vfmaddph256(<16 x half> %a, <16 x half> %c, <16 x half> %d, i32 10)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %a
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vfmaddph256(<16 x half> %a, i16 %b, <16 x half> %c, <16 x half> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132ph {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x69,0xb9,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132ph {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x69,0xb9,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vfmaddph256(<16 x half> %a, <16 x half> %c, <16 x half> %d, i32 9)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vfmaddps256(<8 x float>, <8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vfmaddps256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmadd213ps {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x71,0x78,0xa8,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vfmaddps256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfmaddps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132ps {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x69,0x59,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132ps {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x69,0x59,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vfmaddps256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i32 10)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %a
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vfmaddps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmadd132ps {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x69,0xb9,0x98,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmadd132ps {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x69,0xb9,0x98,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vfmaddps256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i32 9)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vfmaddcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vfmaddcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddcph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaddcph {rz-sae}, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf6,0x7a,0x78,0x56,0xd1]
+; CHECK-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfmaddcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfmaddcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7a,0x59,0x56,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7a,0x59,0x56,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfmaddcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 10)
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.maskz.vfmaddcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_maskz_vfmaddcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7a,0xb9,0x56,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7a,0xb9,0x56,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.maskz.vfmaddcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 9)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vfmaddsubpd256(<4 x double>, <4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vfmaddsubpd256(<4 x double> %a, <4 x double> %b, <4 x double> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddsubpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaddsub213pd {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf1,0x78,0xa6,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vfmaddsubpd256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vfmaddsubpd256(<4 x double> %a, i4 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddsubpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132pd {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xe9,0x59,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddsubpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132pd {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xe9,0x59,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vfmaddsubpd256(<4 x double> %a, <4 x double> %c, <4 x double> %d, i32 10)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %a
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vfmaddsubpd256(<4 x double> %a, i4 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddsubpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132pd {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xe9,0xb9,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddsubpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132pd {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xe9,0xb9,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vfmaddsubpd256(<4 x double> %a, <4 x double> %c, <4 x double> %d, i32 9)
+  %msk = bitcast i4 %b to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vfmaddsubph256(<16 x half>, <16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vfmaddsubph256(<16 x half> %a, <16 x half> %b, <16 x half> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddsubph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaddsub213ph {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf6,0x71,0x78,0xa6,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vfmaddsubph256(<16 x half> %a, <16 x half> %b, <16 x half> %c, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vfmaddsubph256(<16 x half> %a, i16 %b, <16 x half> %c, <16 x half> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddsubph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132ph {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x69,0x59,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddsubph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132ph {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf6,0x69,0x59,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vfmaddsubph256(<16 x half> %a, <16 x half> %c, <16 x half> %d, i32 10)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %a
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vfmaddsubph256(<16 x half> %a, i16 %b, <16 x half> %c, <16 x half> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddsubph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132ph {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x69,0xb9,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddsubph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132ph {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x69,0xb9,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vfmaddsubph256(<16 x half> %a, <16 x half> %c, <16 x half> %d, i32 9)
+  %msk = bitcast i16 %b to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vfmaddsubps256(<8 x float>, <8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vfmaddsubps256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmaddsubps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaddsub213ps {rz-sae}, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x71,0x78,0xa6,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vfmaddsubps256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfmaddsubps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmaddsubps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132ps {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x69,0x59,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmaddsubps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132ps {ru-sae}, %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x69,0x59,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vfmaddsubps256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i32 10)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %a
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vfmaddsubps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmaddsubps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmaddsub132ps {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x69,0xb9,0x96,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmaddsubps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmaddsub132ps {rd-sae}, %ymm1, %ymm2, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x69,0xb9,0x96,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vfmaddsubps256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i32 9)
+  %msk = bitcast i8 %b to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vfmulcph256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vfmulcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind {
+; CHECK-LABEL: test_int_x86_vfmulcph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmulcph {rz-sae}, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf6,0x7a,0x78,0xd6,0xd1]
+; CHECK-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfmulcph256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vfmulcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vfmulcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmulcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7a,0x59,0xd6,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vfmulcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmulcph {ru-sae}, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf6,0x7a,0x59,0xd6,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfmulcph256(<8 x float> %a, <8 x float> %c, <8 x float> %d, i8 %b, i32 10)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vfmulcph256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_maskz_vfmulcph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vfmulcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7a,0xb9,0xd6,0xd1]
+; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vfmulcph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vfmulcph {rd-sae}, %ymm1, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf6,0x7a,0xb9,0xd6,0xd1]
+; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vfmulcph256(<8 x float> %a, <8 x float> %c, <8 x float> zeroinitializer, i8 %b, i32 9)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vgetexppd256(<4 x double>, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vgetexppd256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetexppd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetexppd {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xf9,0x18,0x42,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetexppd256(<4 x double> %a, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vgetexppd256(<4 x double> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetexppd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexppd {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf9,0x19,0x42,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetexppd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexppd {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf9,0x19,0x42,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetexppd256(<4 x double> %c, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vgetexppd256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetexppd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexppd {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf9,0x99,0x42,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetexppd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexppd {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf9,0x99,0x42,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetexppd256(<4 x double> %b, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.mask.vgetexpph256(<16 x half>, <16 x half>, i16, i32)
+define <16 x half> @test_int_x86_vgetexpph256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetexpph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetexpph {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x79,0x18,0x42,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetexpph256(<16 x half> %a, <16 x half> undef, i16 -1, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vgetexpph256(<16 x half> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetexpph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexpph {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x79,0x19,0x42,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetexpph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexpph {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x79,0x19,0x42,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetexpph256(<16 x half> %c, <16 x half> %a, i16 %b, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vgetexpph256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetexpph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexpph {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0x99,0x42,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetexpph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexpph {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0x99,0x42,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetexpph256(<16 x half> %b, <16 x half> zeroinitializer, i16 %a, i32 8)
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vgetexpps256(<8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vgetexpps256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetexpps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetexpps {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x79,0x18,0x42,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetexpps256(<8 x float> %a, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vgetexpps256(<8 x float> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetexpps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexpps {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x79,0x19,0x42,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetexpps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexpps {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x79,0x19,0x42,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetexpps256(<8 x float> %c, <8 x float> %a, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vgetexpps256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetexpps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetexpps {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0x99,0x42,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetexpps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetexpps {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0x99,0x42,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetexpps256(<8 x float> %b, <8 x float> zeroinitializer, i8 %a, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vgetmantpd256(<4 x double>, i32, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vgetmantpd256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetmantpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetmantpd $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xf9,0x18,0x26,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetmantpd256(<4 x double> %a, i32 127, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vgetmantpd256(<4 x double> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetmantpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantpd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x26,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetmantpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantpd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x26,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetmantpd256(<4 x double> %c, i32 127, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vgetmantpd256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetmantpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantpd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x26,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetmantpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantpd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x26,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vgetmantpd256(<4 x double> %b, i32 127, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.mask.vgetmantph256(<16 x half>, i32, <16 x half>, i16, i32)
+define <16 x half> @test_int_x86_vgetmantph256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetmantph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetmantph $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x78,0x18,0x26,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetmantph256(<16 x half> %a, i32 127, <16 x half> undef, i16 -1, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vgetmantph256(<16 x half> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetmantph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x26,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetmantph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x26,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetmantph256(<16 x half> %c, i32 127, <16 x half> %a, i16 %b, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vgetmantph256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetmantph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x26,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetmantph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x26,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vgetmantph256(<16 x half> %b, i32 127, <16 x half> zeroinitializer, i16 %a, i32 8)
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vgetmantps256(<8 x float>, i32, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vgetmantps256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vgetmantps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vgetmantps $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x79,0x18,0x26,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetmantps256(<8 x float> %a, i32 127, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vgetmantps256(<8 x float> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vgetmantps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x26,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vgetmantps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x26,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetmantps256(<8 x float> %c, i32 127, <8 x float> %a, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vgetmantps256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vgetmantps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vgetmantps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x26,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vgetmantps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vgetmantps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x26,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vgetmantps256(<8 x float> %b, i32 127, <8 x float> zeroinitializer, i8 %a, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vmaxpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vmaxpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmaxpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmaxpd {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x18,0x5f,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vmaxpd256(<4 x double> %A, <4 x double> %B, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vmaxpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmaxpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxpd {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x19,0x5f,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmaxpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxpd {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x19,0x5f,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vmaxpd256(<4 x double> %C, <4 x double> %D, i32 8)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vmaxpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmaxpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxpd {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x5f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmaxpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxpd {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x5f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vmaxpd256(<4 x double> %B, <4 x double> %C, i32 8)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vmaxph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vmaxph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmaxph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmaxph {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x5f,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vmaxph256(<16 x half> %A, <16 x half> %B, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vmaxph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmaxph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxph {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x19,0x5f,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmaxph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxph {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x19,0x5f,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vmaxph256(<16 x half> %C, <16 x half> %D, i32 8)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vmaxph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmaxph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxph {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmaxph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxph {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vmaxph256(<16 x half> %B, <16 x half> %C, i32 8)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vmaxps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vmaxps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmaxps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmaxps {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x18,0x5f,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vmaxps256(<8 x float> %A, <8 x float> %B, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vmaxps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmaxps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxps {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x19,0x5f,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmaxps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxps {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x19,0x5f,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vmaxps256(<8 x float> %C, <8 x float> %D, i32 8)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vmaxps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmaxps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmaxps {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmaxps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmaxps {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vmaxps256(<8 x float> %B, <8 x float> %C, i32 8)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vminpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vminpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vminpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vminpd {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x18,0x5d,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vminpd256(<4 x double> %A, <4 x double> %B, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vminpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vminpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminpd {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x19,0x5d,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vminpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminpd {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x19,0x5d,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vminpd256(<4 x double> %C, <4 x double> %D, i32 8)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vminpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vminpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminpd {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x5d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vminpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminpd {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0x99,0x5d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vminpd256(<4 x double> %B, <4 x double> %C, i32 8)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vminph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vminph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vminph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vminph {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x5d,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vminph256(<16 x half> %A, <16 x half> %B, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vminph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vminph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminph {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x19,0x5d,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vminph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminph {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x19,0x5d,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vminph256(<16 x half> %C, <16 x half> %D, i32 8)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vminph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vminph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminph {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vminph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminph {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x5d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vminph256(<16 x half> %B, <16 x half> %C, i32 8)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vminps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vminps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vminps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vminps {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x18,0x5d,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vminps256(<8 x float> %A, <8 x float> %B, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vminps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vminps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminps {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x19,0x5d,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vminps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminps {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x19,0x5d,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vminps256(<8 x float> %C, <8 x float> %D, i32 8)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vminps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vminps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vminps {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5d,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vminps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vminps {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0x99,0x5d,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vminps256(<8 x float> %B, <8 x float> %C, i32 8)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vmulpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vmulpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmulpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x59,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vmulpd256(<4 x double> %A, <4 x double> %B, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vmulpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmulpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x59,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmulpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x59,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vmulpd256(<4 x double> %C, <4 x double> %D, i32 10)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vmulpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmulpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x59,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmulpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x59,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vmulpd256(<4 x double> %B, <4 x double> %C, i32 9)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vmulph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vmulph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmulph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x59,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vmulph256(<16 x half> %A, <16 x half> %B, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vmulph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmulph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x59,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmulph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x59,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vmulph256(<16 x half> %C, <16 x half> %D, i32 10)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vmulph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmulph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x59,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmulph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x59,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vmulph256(<16 x half> %B, <16 x half> %C, i32 9)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vmulps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vmulps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vmulps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x59,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vmulps256(<8 x float> %A, <8 x float> %B, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vmulps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vmulps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x59,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vmulps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x59,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vmulps256(<8 x float> %C, <8 x float> %D, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vmulps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vmulps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vmulps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x59,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vmulps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vmulps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x59,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vmulps256(<8 x float> %B, <8 x float> %C, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vrangepd256(<4 x double>, <4 x double>, i32, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vrangepd256(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: test_int_x86_vrangepd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrangepd $127, {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xf9,0x18,0x50,0xc1,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrangepd256(<4 x double> %a, <4 x double> %b, i32 127, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vrangepd256(<4 x double> %a, i8 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vrangepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrangepd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf1,0x19,0x50,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vrangepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrangepd $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf1,0x19,0x50,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrangepd256(<4 x double> %c, <4 x double> %d, i32 127, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vrangepd256(i8 %a, <4 x double> %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_maskz_vrangepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrangepd $127, {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x50,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vrangepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrangepd $127, {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x50,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrangepd256(<4 x double> %b, <4 x double> %c, i32 127, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vrangeps256(<8 x float>, <8 x float>, i32, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vrangeps256(<8 x float> %a, <8 x float> %b) nounwind {
+; CHECK-LABEL: test_int_x86_vrangeps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrangeps $127, {sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x79,0x18,0x50,0xc1,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrangeps256(<8 x float> %a, <8 x float> %b, i32 127, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vrangeps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vrangeps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrangeps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x71,0x19,0x50,0xc2,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vrangeps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrangeps $127, {sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x71,0x19,0x50,0xc2,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrangeps256(<8 x float> %c, <8 x float> %d, i32 127, <8 x float> %a, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vrangeps256(i8 %a, <8 x float> %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_maskz_vrangeps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrangeps $127, {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x50,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vrangeps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrangeps $127, {sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x50,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrangeps256(<8 x float> %b, <8 x float> %c, i32 127, <8 x float> zeroinitializer, i8 %a, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vreducepd256(<4 x double>, i32, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vreducepd256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vreducepd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vreducepd $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xf9,0x18,0x56,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vreducepd256(<4 x double> %a, i32 127, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vreducepd256(<4 x double> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vreducepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreducepd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x56,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vreducepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreducepd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x56,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vreducepd256(<4 x double> %c, i32 127, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vreducepd256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vreducepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreducepd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x56,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vreducepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreducepd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x56,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vreducepd256(<4 x double> %b, i32 127, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.mask.vreduceph256(<16 x half>, i32, <16 x half>, i16, i32)
+define <16 x half> @test_int_x86_vreduceph256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vreduceph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vreduceph $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x78,0x18,0x56,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vreduceph256(<16 x half> %a, i32 127, <16 x half> undef, i16 -1, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vreduceph256(<16 x half> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vreduceph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreduceph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x56,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vreduceph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreduceph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x56,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vreduceph256(<16 x half> %c, i32 127, <16 x half> %a, i16 %b, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vreduceph256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vreduceph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreduceph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x56,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vreduceph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreduceph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x56,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vreduceph256(<16 x half> %b, i32 127, <16 x half> zeroinitializer, i16 %a, i32 8)
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vreduceps256(<8 x float>, i32, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vreduceps256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vreduceps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vreduceps $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x79,0x18,0x56,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vreduceps256(<8 x float> %a, i32 127, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vreduceps256(<8 x float> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vreduceps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreduceps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x56,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vreduceps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreduceps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x56,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vreduceps256(<8 x float> %c, i32 127, <8 x float> %a, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vreduceps256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vreduceps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vreduceps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x56,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vreduceps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vreduceps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x56,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vreduceps256(<8 x float> %b, i32 127, <8 x float> zeroinitializer, i8 %a, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vrndscalepd256(<4 x double>, i32, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vrndscalepd256(<4 x double> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vrndscalepd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrndscalepd $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xf9,0x18,0x09,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrndscalepd256(<4 x double> %a, i32 127, <4 x double> undef, i8 -1, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vrndscalepd256(<4 x double> %a, i8 %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vrndscalepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscalepd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x09,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vrndscalepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscalepd $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf9,0x19,0x09,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrndscalepd256(<4 x double> %c, i32 127, <4 x double> %a, i8 %b, i32 8)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vrndscalepd256(i8 %a, <4 x double> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vrndscalepd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscalepd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x09,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vrndscalepd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscalepd $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf9,0x99,0x09,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vrndscalepd256(<4 x double> %b, i32 127, <4 x double> zeroinitializer, i8 %a, i32 8)
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.mask.vrndscaleph256(<16 x half>, i32, <16 x half>, i16, i32)
+define <16 x half> @test_int_x86_vrndscaleph256(<16 x half> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vrndscaleph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrndscaleph $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x78,0x18,0x08,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vrndscaleph256(<16 x half> %a, i32 127, <16 x half> undef, i16 -1, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vrndscaleph256(<16 x half> %a, i16 %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vrndscaleph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscaleph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x08,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vrndscaleph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscaleph $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x78,0x19,0x08,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vrndscaleph256(<16 x half> %c, i32 127, <16 x half> %a, i16 %b, i32 8)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vrndscaleph256(i16 %a, <16 x half> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vrndscaleph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscaleph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x08,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vrndscaleph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscaleph $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x78,0x99,0x08,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vrndscaleph256(<16 x half> %b, i32 127, <16 x half> zeroinitializer, i16 %a, i32 8)
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vrndscaleps256(<8 x float>, i32, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vrndscaleps256(<8 x float> %a) nounwind {
+; CHECK-LABEL: test_int_x86_vrndscaleps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vrndscaleps $127, {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x79,0x18,0x08,0xc0,0x7f]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrndscaleps256(<8 x float> %a, i32 127, <8 x float> undef, i8 -1, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vrndscaleps256(<8 x float> %a, i8 %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_mask_vrndscaleps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscaleps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x08,0xc1,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vrndscaleps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscaleps $127, {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x79,0x19,0x08,0xc1,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrndscaleps256(<8 x float> %c, i32 127, <8 x float> %a, i8 %b, i32 8)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vrndscaleps256(i8 %a, <8 x float> %b) nounwind {
+; X86-LABEL: test_int_x86_maskz_vrndscaleps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vrndscaleps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x08,0xc0,0x7f]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vrndscaleps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vrndscaleps $127, {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x79,0x99,0x08,0xc0,0x7f]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vrndscaleps256(<8 x float> %b, i32 127, <8 x float> zeroinitializer, i8 %a, i32 8)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.mask.vscalefpd256(<4 x double>, <4 x double>, <4 x double>, i8, i32)
+define <4 x double> @test_int_x86_vscalefpd256(<4 x double> %a, <4 x double> %b) nounwind {
+; CHECK-LABEL: test_int_x86_vscalefpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vscalefpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xf9,0x78,0x2c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vscalefpd256(<4 x double> %a, <4 x double> %b, <4 x double> undef, i8 -1, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vscalefpd256(<4 x double> %a, i8 %b, <4 x double> %c, <4 x double> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vscalefpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf1,0x59,0x2c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vscalefpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf1,0x59,0x2c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vscalefpd256(<4 x double> %c, <4 x double> %d, <4 x double> %a, i8 %b, i32 10)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vscalefpd256(i8 %a, <4 x double> %b, <4 x double> %c) nounwind {
+; X86-LABEL: test_int_x86_maskz_vscalefpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf9,0xb9,0x2c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vscalefpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf9,0xb9,0x2c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.mask.vscalefpd256(<4 x double> %b, <4 x double> %c, <4 x double> zeroinitializer, i8 %a, i32 9)
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.mask.vscalefph256(<16 x half>, <16 x half>, <16 x half>, i16, i32)
+define <16 x half> @test_int_x86_vscalefph256(<16 x half> %a, <16 x half> %b) nounwind {
+; CHECK-LABEL: test_int_x86_vscalefph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vscalefph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf6,0x79,0x78,0x2c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vscalefph256(<16 x half> %a, <16 x half> %b, <16 x half> undef, i16 -1, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vscalefph256(<16 x half> %a, i16 %b, <16 x half> %c, <16 x half> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vscalefph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x71,0x59,0x2c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vscalefph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf6,0x71,0x59,0x2c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vscalefph256(<16 x half> %c, <16 x half> %d, <16 x half> %a, i16 %b, i32 10)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vscalefph256(i16 %a, <16 x half> %b, <16 x half> %c) nounwind {
+; X86-LABEL: test_int_x86_maskz_vscalefph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0xb9,0x2c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vscalefph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf6,0x79,0xb9,0x2c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.mask.vscalefph256(<16 x half> %b, <16 x half> %c, <16 x half> zeroinitializer, i16 %a, i32 9)
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.mask.vscalefps256(<8 x float>, <8 x float>, <8 x float>, i8, i32)
+define <8 x float> @test_int_x86_vscalefps256(<8 x float> %a, <8 x float> %b) nounwind {
+; CHECK-LABEL: test_int_x86_vscalefps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vscalefps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x79,0x78,0x2c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vscalefps256(<8 x float> %a, <8 x float> %b, <8 x float> undef, i8 -1, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vscalefps256(<8 x float> %a, i8 %b, <8 x float> %c, <8 x float> %d) nounwind {
+; X86-LABEL: test_int_x86_mask_vscalefps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x59,0x2c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vscalefps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x71,0x59,0x2c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vscalefps256(<8 x float> %c, <8 x float> %d, <8 x float> %a, i8 %b, i32 10)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vscalefps256(i8 %a, <8 x float> %b, <8 x float> %c) nounwind {
+; X86-LABEL: test_int_x86_maskz_vscalefps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vscalefps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xb9,0x2c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vscalefps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vscalefps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x79,0xb9,0x2c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.mask.vscalefps256(<8 x float> %b, <8 x float> %c, <8 x float> zeroinitializer, i8 %a, i32 9)
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double>, i32)
+define <4 x double> @test_int_x86_vsqrtpd256(<4 x double> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vsqrtpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsqrtpd {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x51,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %A, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vsqrtpd256(<4 x double> %A, i4 %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vsqrtpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtpd {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x51,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsqrtpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtpd {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf9,0x59,0x51,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %C, i32 10)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vsqrtpd256(i4 %A, <4 x double> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsqrtpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtpd {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x51,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsqrtpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtpd {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x51,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %B, i32 9)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half>, i32)
+define <16 x half> @test_int_x86_vsqrtph256(<16 x half> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vsqrtph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsqrtph {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x51,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %A, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vsqrtph256(<16 x half> %A, i16 %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vsqrtph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtph {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x51,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsqrtph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtph {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x59,0x51,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %C, i32 10)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vsqrtph256(i16 %A, <16 x half> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsqrtph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtph {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x51,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsqrtph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtph {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x51,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %B, i32 9)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float>, i32)
+define <8 x float> @test_int_x86_vsqrtps256(<8 x float> %A) nounwind {
+; CHECK-LABEL: test_int_x86_vsqrtps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsqrtps {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x51,0xc0]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %A, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vsqrtps256(<8 x float> %A, i8 %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_mask_vsqrtps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x51,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsqrtps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtps {ru-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x78,0x59,0x51,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %C, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vsqrtps256(i8 %A, <8 x float> %B) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsqrtps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsqrtps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x51,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsqrtps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsqrtps {rd-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x51,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %B, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}
+
+declare <4 x double> @llvm.x86.avx10.vsubpd256(<4 x double>, <4 x double>, i32)
+define <4 x double> @test_int_x86_vsubpd256(<4 x double> %A, <4 x double> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vsubpd256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubpd {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xf9,0x78,0x5c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x double> @llvm.x86.avx10.vsubpd256(<4 x double> %A, <4 x double> %B, i32 11)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_mask_vsubpd256(<4 x double> %A, i4 %B, <4 x double> %C, <4 x double> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vsubpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x5c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsubpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubpd {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xf1,0x59,0x5c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vsubpd256(<4 x double> %C, <4 x double> %D, i32 10)
+  %msk = bitcast i4 %B to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> %A
+  ret <4 x double> %ret
+}
+
+define <4 x double> @test_int_x86_maskz_vsubpd256(i4 %A, <4 x double> %B, <4 x double> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsubpd256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsubpd256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubpd {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xf9,0xb9,0x5c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <4 x double> @llvm.x86.avx10.vsubpd256(<4 x double> %B, <4 x double> %C, i32 9)
+  %msk = bitcast i4 %A to <4 x i1>
+  %ret = select <4 x i1> %msk, <4 x double> %ret0, <4 x double> zeroinitializer
+  ret <4 x double> %ret
+}
+
+declare <16 x half> @llvm.x86.avx10.vsubph256(<16 x half>, <16 x half>, i32)
+define <16 x half> @test_int_x86_vsubph256(<16 x half> %A, <16 x half> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vsubph256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubph {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x5c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x half> @llvm.x86.avx10.vsubph256(<16 x half> %A, <16 x half> %B, i32 11)
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_mask_vsubph256(<16 x half> %A, i16 %B, <16 x half> %C, <16 x half> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vsubph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x5c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsubph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubph {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x70,0x59,0x5c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vsubph256(<16 x half> %C, <16 x half> %D, i32 10)
+  %msk = bitcast i16 %B to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> %A
+  ret <16 x half> %ret
+}
+
+define <16 x half> @test_int_x86_maskz_vsubph256(i16 %A, <16 x half> %B, <16 x half> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsubph256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsubph256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubph {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xb9,0x5c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <16 x half> @llvm.x86.avx10.vsubph256(<16 x half> %B, <16 x half> %C, i32 9)
+  %msk = bitcast i16 %A to <16 x i1>
+  %ret = select <16 x i1> %msk, <16 x half> %ret0, <16 x half> zeroinitializer
+  ret <16 x half> %ret
+}
+
+declare <8 x float> @llvm.x86.avx10.vsubps256(<8 x float>, <8 x float>, i32)
+define <8 x float> @test_int_x86_vsubps256(<8 x float> %A, <8 x float> %B) nounwind {
+; CHECK-LABEL: test_int_x86_vsubps256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsubps {rz-sae}, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x78,0x78,0x5c,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.avx10.vsubps256(<8 x float> %A, <8 x float> %B, i32 11)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_mask_vsubps256(<8 x float> %A, i8 %B, <8 x float> %C, <8 x float> %D) nounwind {
+; X86-LABEL: test_int_x86_mask_vsubps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x5c,0xc2]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_mask_vsubps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubps {ru-sae}, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x70,0x59,0x5c,0xc2]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vsubps256(<8 x float> %C, <8 x float> %D, i32 10)
+  %msk = bitcast i8 %B to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> %A
+  ret <8 x float> %ret
+}
+
+define <8 x float> @test_int_x86_maskz_vsubps256(i8 %A, <8 x float> %B, <8 x float> %C) nounwind {
+; X86-LABEL: test_int_x86_maskz_vsubps256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vsubps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5c,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_int_x86_maskz_vsubps256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vsubps {rd-sae}, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x78,0xb9,0x5c,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %ret0 = call <8 x float> @llvm.x86.avx10.vsubps256(<8 x float> %B, <8 x float> %C, i32 9)
+  %msk = bitcast i8 %A to <8 x i1>
+  %ret = select <8 x i1> %msk, <8 x float> %ret0, <8 x float> zeroinitializer
+  ret <8 x float> %ret
+}

diff  --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
index 59457e6eec293..8254e37e9aa9e 100644
--- a/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt
@@ -148,3 +148,1743 @@
 # ATT:   vaddps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
 # INTEL: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
 0x62,0xf1,0x60,0xff,0x58,0xd4
+
+# ATT:   vcmppd $123, {sae}, %ymm4, %ymm3, %k5
+# INTEL: vcmppd k5, ymm3, ymm4, {sae}, 123
+0x62,0xf1,0xe1,0x18,0xc2,0xec,0x7b
+
+# ATT:   vcmppd $123, {sae}, %ymm4, %ymm3, %k5 {%k7}
+# INTEL: vcmppd k5 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf1,0xe1,0x1f,0xc2,0xec,0x7b
+
+# ATT:   vcmpph $123, {sae}, %ymm4, %ymm3, %k5
+# INTEL: vcmpph k5, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x60,0x18,0xc2,0xec,0x7b
+
+# ATT:   vcmpph $123, {sae}, %ymm4, %ymm3, %k5 {%k7}
+# INTEL: vcmpph k5 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x60,0x1f,0xc2,0xec,0x7b
+
+# ATT:   vcmpps $123, {sae}, %ymm4, %ymm3, %k5
+# INTEL: vcmpps k5, ymm3, ymm4, {sae}, 123
+0x62,0xf1,0x60,0x18,0xc2,0xec,0x7b
+
+# ATT:   vcmpps $123, {sae}, %ymm4, %ymm3, %k5 {%k7}
+# INTEL: vcmpps k5 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf1,0x60,0x1f,0xc2,0xec,0x7b
+
+# ATT:   vcvtdq2ph {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtdq2ph xmm2, ymm3, {rn-sae}
+0x62,0xf5,0x78,0x18,0x5b,0xd3
+
+# ATT:   vcvtdq2ph {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtdq2ph xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x78,0x3f,0x5b,0xd3
+
+# ATT:   vcvtdq2ph {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtdq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x78,0xff,0x5b,0xd3
+
+# ATT:   vcvtdq2ps {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtdq2ps ymm2, ymm3, {rn-sae}
+0x62,0xf1,0x78,0x18,0x5b,0xd3
+
+# ATT:   vcvtdq2ps {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtdq2ps ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0x78,0x3f,0x5b,0xd3
+
+# ATT:   vcvtdq2ps {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtdq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0x78,0xff,0x5b,0xd3
+
+# ATT:   vcvtpd2dq {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtpd2dq xmm2, ymm3, {rn-sae}
+0x62,0xf1,0xfb,0x18,0xe6,0xd3
+
+# ATT:   vcvtpd2dq {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtpd2dq xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xfb,0x3f,0xe6,0xd3
+
+# ATT:   vcvtpd2dq {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtpd2dq xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xfb,0xff,0xe6,0xd3
+
+# ATT:   vcvtpd2ph {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtpd2ph xmm2, ymm3, {rn-sae}
+0x62,0xf5,0xf9,0x18,0x5a,0xd3
+
+# ATT:   vcvtpd2ph {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtpd2ph xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0xf9,0x3f,0x5a,0xd3
+
+# ATT:   vcvtpd2ph {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtpd2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0xf9,0xff,0x5a,0xd3
+
+# ATT:   vcvtpd2ps {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtpd2ps xmm2, ymm3, {rn-sae}
+0x62,0xf1,0xf9,0x18,0x5a,0xd3
+
+# ATT:   vcvtpd2ps {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtpd2ps xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf9,0x3f,0x5a,0xd3
+
+# ATT:   vcvtpd2ps {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtpd2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf9,0xff,0x5a,0xd3
+
+# ATT:   vcvtpd2qq {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtpd2qq ymm2, ymm3, {rn-sae}
+0x62,0xf1,0xf9,0x18,0x7b,0xd3
+
+# ATT:   vcvtpd2qq {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtpd2qq ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf9,0x3f,0x7b,0xd3
+
+# ATT:   vcvtpd2qq {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtpd2qq ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf9,0xff,0x7b,0xd3
+
+# ATT:   vcvtpd2udq {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtpd2udq xmm2, ymm3, {rn-sae}
+0x62,0xf1,0xf8,0x18,0x79,0xd3
+
+# ATT:   vcvtpd2udq {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtpd2udq xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf8,0x3f,0x79,0xd3
+
+# ATT:   vcvtpd2udq {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtpd2udq xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf8,0xff,0x79,0xd3
+
+# ATT:   vcvtpd2uqq {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtpd2uqq ymm2, ymm3, {rn-sae}
+0x62,0xf1,0xf9,0x18,0x79,0xd3
+
+# ATT:   vcvtpd2uqq {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtpd2uqq ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf9,0x3f,0x79,0xd3
+
+# ATT:   vcvtpd2uqq {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtpd2uqq ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf9,0xff,0x79,0xd3
+
+# ATT:   vcvtph2dq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtph2dq ymm2, xmm3, {rn-sae}
+0x62,0xf5,0x79,0x18,0x5b,0xd3
+
+# ATT:   vcvtph2dq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2dq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf5,0x79,0x3f,0x5b,0xd3
+
+# ATT:   vcvtph2dq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2dq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf5,0x79,0xff,0x5b,0xd3
+
+# ATT:   vcvtph2pd {sae}, %xmm3, %ymm2
+# INTEL: vcvtph2pd ymm2, xmm3, {sae}
+0x62,0xf5,0x78,0x18,0x5a,0xd3
+
+# ATT:   vcvtph2pd {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2pd ymm2 {k7}, xmm3, {sae}
+0x62,0xf5,0x78,0x1f,0x5a,0xd3
+
+# ATT:   vcvtph2pd {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2pd ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x78,0x9f,0x5a,0xd3
+
+# ATT:   vcvtph2ps {sae}, %xmm3, %ymm2
+# INTEL: vcvtph2ps ymm2, xmm3, {sae}
+0x62,0xf2,0x79,0x18,0x13,0xd3
+
+# ATT:   vcvtph2ps {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2ps ymm2 {k7}, xmm3, {sae}
+0x62,0xf2,0x79,0x1f,0x13,0xd3
+
+# ATT:   vcvtph2ps {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2ps ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf2,0x79,0x9f,0x13,0xd3
+
+# ATT:   vcvtph2psx {sae}, %xmm3, %ymm2
+# INTEL: vcvtph2psx ymm2, xmm3, {sae}
+0x62,0xf6,0x79,0x18,0x13,0xd3
+
+# ATT:   vcvtph2psx {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2psx ymm2 {k7}, xmm3, {sae}
+0x62,0xf6,0x79,0x1f,0x13,0xd3
+
+# ATT:   vcvtph2psx {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2psx ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf6,0x79,0x9f,0x13,0xd3
+
+# ATT:   vcvtph2qq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtph2qq ymm2, xmm3, {rn-sae}
+0x62,0xf5,0x79,0x18,0x7b,0xd3
+
+# ATT:   vcvtph2qq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2qq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf5,0x79,0x3f,0x7b,0xd3
+
+# ATT:   vcvtph2qq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf5,0x79,0xff,0x7b,0xd3
+
+# ATT:   vcvtph2udq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtph2udq ymm2, xmm3, {rn-sae}
+0x62,0xf5,0x78,0x18,0x79,0xd3
+
+# ATT:   vcvtph2udq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2udq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf5,0x78,0x3f,0x79,0xd3
+
+# ATT:   vcvtph2udq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2udq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf5,0x78,0xff,0x79,0xd3
+
+# ATT:   vcvtph2uqq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtph2uqq ymm2, xmm3, {rn-sae}
+0x62,0xf5,0x79,0x18,0x79,0xd3
+
+# ATT:   vcvtph2uqq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtph2uqq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf5,0x79,0x3f,0x79,0xd3
+
+# ATT:   vcvtph2uqq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf5,0x79,0xff,0x79,0xd3
+
+# ATT:   vcvtph2uw {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtph2uw ymm2, ymm3, {rn-sae}
+0x62,0xf5,0x78,0x18,0x7d,0xd3
+
+# ATT:   vcvtph2uw {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtph2uw ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x78,0x3f,0x7d,0xd3
+
+# ATT:   vcvtph2uw {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2uw ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x78,0xff,0x7d,0xd3
+
+# ATT:   vcvtph2w {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtph2w ymm2, ymm3, {rn-sae}
+0x62,0xf5,0x79,0x18,0x7d,0xd3
+
+# ATT:   vcvtph2w {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtph2w ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x79,0x3f,0x7d,0xd3
+
+# ATT:   vcvtph2w {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtph2w ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x79,0xff,0x7d,0xd3
+
+# ATT:   vcvtps2dq {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtps2dq ymm2, ymm3, {rn-sae}
+0x62,0xf1,0x79,0x18,0x5b,0xd3
+
+# ATT:   vcvtps2dq {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtps2dq ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0x79,0x3f,0x5b,0xd3
+
+# ATT:   vcvtps2dq {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtps2dq ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0x79,0xff,0x5b,0xd3
+
+# ATT:   vcvtps2pd {sae}, %xmm3, %ymm2
+# INTEL: vcvtps2pd ymm2, xmm3, {sae}
+0x62,0xf1,0x78,0x18,0x5a,0xd3
+
+# ATT:   vcvtps2pd {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtps2pd ymm2 {k7}, xmm3, {sae}
+0x62,0xf1,0x78,0x1f,0x5a,0xd3
+
+# ATT:   vcvtps2pd {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtps2pd ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf1,0x78,0x9f,0x5a,0xd3
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm3, %xmm2
+# INTEL: vcvtps2ph xmm2, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x18,0x1d,0xda,0x7b
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtps2ph xmm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x1f,0x1d,0xda,0x7b
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtps2ph xmm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x9f,0x1d,0xda,0x7b
+
+# ATT:   vcvtps2phx {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtps2phx xmm2, ymm3, {rn-sae}
+0x62,0xf5,0x79,0x18,0x1d,0xd3
+
+# ATT:   vcvtps2phx {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtps2phx xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x79,0x3f,0x1d,0xd3
+
+# ATT:   vcvtps2phx {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtps2phx xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x79,0xff,0x1d,0xd3
+
+# ATT:   vcvtps2qq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtps2qq ymm2, xmm3, {rn-sae}
+0x62,0xf1,0x79,0x18,0x7b,0xd3
+
+# ATT:   vcvtps2qq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtps2qq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf1,0x79,0x3f,0x7b,0xd3
+
+# ATT:   vcvtps2qq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtps2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf1,0x79,0xff,0x7b,0xd3
+
+# ATT:   vcvtps2udq {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtps2udq ymm2, ymm3, {rn-sae}
+0x62,0xf1,0x78,0x18,0x79,0xd3
+
+# ATT:   vcvtps2udq {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtps2udq ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0x78,0x3f,0x79,0xd3
+
+# ATT:   vcvtps2udq {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtps2udq ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0x78,0xff,0x79,0xd3
+
+# ATT:   vcvtps2uqq {rn-sae}, %xmm3, %ymm2
+# INTEL: vcvtps2uqq ymm2, xmm3, {rn-sae}
+0x62,0xf1,0x79,0x18,0x79,0xd3
+
+# ATT:   vcvtps2uqq {rd-sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvtps2uqq ymm2 {k7}, xmm3, {rd-sae}
+0x62,0xf1,0x79,0x3f,0x79,0xd3
+
+# ATT:   vcvtps2uqq {rz-sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvtps2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+0x62,0xf1,0x79,0xff,0x79,0xd3
+
+# ATT:   vcvtqq2pd {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtqq2pd ymm2, ymm3, {rn-sae}
+0x62,0xf1,0xfa,0x18,0xe6,0xd3
+
+# ATT:   vcvtqq2pd {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtqq2pd ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xfa,0x3f,0xe6,0xd3
+
+# ATT:   vcvtqq2pd {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xfa,0xff,0xe6,0xd3
+
+# ATT:   vcvtqq2ph {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtqq2ph xmm2, ymm3, {rn-sae}
+0x62,0xf5,0xf8,0x18,0x5b,0xd3
+
+# ATT:   vcvtqq2ph {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtqq2ph xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0xf8,0x3f,0x5b,0xd3
+
+# ATT:   vcvtqq2ph {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0xf8,0xff,0x5b,0xd3
+
+# ATT:   vcvtqq2ps {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtqq2ps xmm2, ymm3, {rn-sae}
+0x62,0xf1,0xf8,0x18,0x5b,0xd3
+
+# ATT:   vcvtqq2ps {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtqq2ps xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf8,0x3f,0x5b,0xd3
+
+# ATT:   vcvtqq2ps {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf8,0xff,0x5b,0xd3
+
+# ATT:   vcvttpd2dq {sae}, %ymm3, %xmm2
+# INTEL: vcvttpd2dq xmm2, ymm3, {sae}
+0x62,0xf1,0xf9,0x18,0xe6,0xd3
+
+# ATT:   vcvttpd2dq {sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvttpd2dq xmm2 {k7}, ymm3, {sae}
+0x62,0xf1,0xf9,0x1f,0xe6,0xd3
+
+# ATT:   vcvttpd2dq {sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dq xmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0xf9,0x9f,0xe6,0xd3
+
+# ATT:   vcvttpd2qq {sae}, %ymm3, %ymm2
+# INTEL: vcvttpd2qq ymm2, ymm3, {sae}
+0x62,0xf1,0xf9,0x18,0x7a,0xd3
+
+# ATT:   vcvttpd2qq {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttpd2qq ymm2 {k7}, ymm3, {sae}
+0x62,0xf1,0xf9,0x1f,0x7a,0xd3
+
+# ATT:   vcvttpd2qq {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2qq ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0xf9,0x9f,0x7a,0xd3
+
+# ATT:   vcvttpd2udq {sae}, %ymm3, %xmm2
+# INTEL: vcvttpd2udq xmm2, ymm3, {sae}
+0x62,0xf1,0xf8,0x18,0x78,0xd3
+
+# ATT:   vcvttpd2udq {sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvttpd2udq xmm2 {k7}, ymm3, {sae}
+0x62,0xf1,0xf8,0x1f,0x78,0xd3
+
+# ATT:   vcvttpd2udq {sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udq xmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0xf8,0x9f,0x78,0xd3
+
+# ATT:   vcvttpd2uqq {sae}, %ymm3, %ymm2
+# INTEL: vcvttpd2uqq ymm2, ymm3, {sae}
+0x62,0xf1,0xf9,0x18,0x78,0xd3
+
+# ATT:   vcvttpd2uqq {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttpd2uqq ymm2 {k7}, ymm3, {sae}
+0x62,0xf1,0xf9,0x1f,0x78,0xd3
+
+# ATT:   vcvttpd2uqq {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2uqq ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0xf9,0x9f,0x78,0xd3
+
+# ATT:   vcvttph2dq {sae}, %xmm3, %ymm2
+# INTEL: vcvttph2dq ymm2, xmm3, {sae}
+0x62,0xf5,0x7a,0x18,0x5b,0xd3
+
+# ATT:   vcvttph2dq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttph2dq ymm2 {k7}, xmm3, {sae}
+0x62,0xf5,0x7a,0x1f,0x5b,0xd3
+
+# ATT:   vcvttph2dq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2dq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x7a,0x9f,0x5b,0xd3
+
+# ATT:   vcvttph2qq {sae}, %xmm3, %ymm2
+# INTEL: vcvttph2qq ymm2, xmm3, {sae}
+0x62,0xf5,0x79,0x18,0x7a,0xd3
+
+# ATT:   vcvttph2qq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttph2qq ymm2 {k7}, xmm3, {sae}
+0x62,0xf5,0x79,0x1f,0x7a,0xd3
+
+# ATT:   vcvttph2qq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2qq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x79,0x9f,0x7a,0xd3
+
+# ATT:   vcvttph2udq {sae}, %xmm3, %ymm2
+# INTEL: vcvttph2udq ymm2, xmm3, {sae}
+0x62,0xf5,0x78,0x18,0x78,0xd3
+
+# ATT:   vcvttph2udq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttph2udq ymm2 {k7}, xmm3, {sae}
+0x62,0xf5,0x78,0x1f,0x78,0xd3
+
+# ATT:   vcvttph2udq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2udq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x78,0x9f,0x78,0xd3
+
+# ATT:   vcvttph2uqq {sae}, %xmm3, %ymm2
+# INTEL: vcvttph2uqq ymm2, xmm3, {sae}
+0x62,0xf5,0x79,0x18,0x78,0xd3
+
+# ATT:   vcvttph2uqq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttph2uqq ymm2 {k7}, xmm3, {sae}
+0x62,0xf5,0x79,0x1f,0x78,0xd3
+
+# ATT:   vcvttph2uqq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2uqq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x79,0x9f,0x78,0xd3
+
+# ATT:   vcvttph2uw {sae}, %ymm3, %ymm2
+# INTEL: vcvttph2uw ymm2, ymm3, {sae}
+0x62,0xf5,0x78,0x18,0x7c,0xd3
+
+# ATT:   vcvttph2uw {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttph2uw ymm2 {k7}, ymm3, {sae}
+0x62,0xf5,0x78,0x1f,0x7c,0xd3
+
+# ATT:   vcvttph2uw {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2uw ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x78,0x9f,0x7c,0xd3
+
+# ATT:   vcvttph2w {sae}, %ymm3, %ymm2
+# INTEL: vcvttph2w ymm2, ymm3, {sae}
+0x62,0xf5,0x79,0x18,0x7c,0xd3
+
+# ATT:   vcvttph2w {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttph2w ymm2 {k7}, ymm3, {sae}
+0x62,0xf5,0x79,0x1f,0x7c,0xd3
+
+# ATT:   vcvttph2w {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttph2w ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x79,0x9f,0x7c,0xd3
+
+# ATT:   vcvttps2dq {sae}, %ymm3, %ymm2
+# INTEL: vcvttps2dq ymm2, ymm3, {sae}
+0x62,0xf1,0x7a,0x18,0x5b,0xd3
+
+# ATT:   vcvttps2dq {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttps2dq ymm2 {k7}, ymm3, {sae}
+0x62,0xf1,0x7a,0x1f,0x5b,0xd3
+
+# ATT:   vcvttps2dq {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2dq ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0x7a,0x9f,0x5b,0xd3
+
+# ATT:   vcvttps2qq {sae}, %xmm3, %ymm2
+# INTEL: vcvttps2qq ymm2, xmm3, {sae}
+0x62,0xf1,0x79,0x18,0x7a,0xd3
+
+# ATT:   vcvttps2qq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttps2qq ymm2 {k7}, xmm3, {sae}
+0x62,0xf1,0x79,0x1f,0x7a,0xd3
+
+# ATT:   vcvttps2qq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2qq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf1,0x79,0x9f,0x7a,0xd3
+
+# ATT:   vcvttps2udq {sae}, %ymm3, %ymm2
+# INTEL: vcvttps2udq ymm2, ymm3, {sae}
+0x62,0xf1,0x78,0x18,0x78,0xd3
+
+# ATT:   vcvttps2udq {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvttps2udq ymm2 {k7}, ymm3, {sae}
+0x62,0xf1,0x78,0x1f,0x78,0xd3
+
+# ATT:   vcvttps2udq {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2udq ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf1,0x78,0x9f,0x78,0xd3
+
+# ATT:   vcvttps2uqq {sae}, %xmm3, %ymm2
+# INTEL: vcvttps2uqq ymm2, xmm3, {sae}
+0x62,0xf1,0x79,0x18,0x78,0xd3
+
+# ATT:   vcvttps2uqq {sae}, %xmm3, %ymm2 {%k7}
+# INTEL: vcvttps2uqq ymm2 {k7}, xmm3, {sae}
+0x62,0xf1,0x79,0x1f,0x78,0xd3
+
+# ATT:   vcvttps2uqq {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2uqq ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf1,0x79,0x9f,0x78,0xd3
+
+# ATT:   vcvtudq2ph {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtudq2ph xmm2, ymm3, {rn-sae}
+0x62,0xf5,0x7b,0x18,0x7a,0xd3
+
+# ATT:   vcvtudq2ph {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtudq2ph xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x7b,0x3f,0x7a,0xd3
+
+# ATT:   vcvtudq2ph {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtudq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x7b,0xff,0x7a,0xd3
+
+# ATT:   vcvtudq2ps {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtudq2ps ymm2, ymm3, {rn-sae}
+0x62,0xf1,0x7b,0x18,0x7a,0xd3
+
+# ATT:   vcvtudq2ps {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtudq2ps ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0x7b,0x3f,0x7a,0xd3
+
+# ATT:   vcvtudq2ps {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtudq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0x7b,0xff,0x7a,0xd3
+
+# ATT:   vcvtuqq2pd {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtuqq2pd ymm2, ymm3, {rn-sae}
+0x62,0xf1,0xfa,0x18,0x7a,0xd3
+
+# ATT:   vcvtuqq2pd {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtuqq2pd ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xfa,0x3f,0x7a,0xd3
+
+# ATT:   vcvtuqq2pd {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtuqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xfa,0xff,0x7a,0xd3
+
+# ATT:   vcvtuqq2ph {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtuqq2ph xmm2, ymm3, {rn-sae}
+0x62,0xf5,0xfb,0x18,0x7a,0xd3
+
+# ATT:   vcvtuqq2ph {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtuqq2ph xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0xfb,0x3f,0x7a,0xd3
+
+# ATT:   vcvtuqq2ph {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0xfb,0xff,0x7a,0xd3
+
+# ATT:   vcvtuqq2ps {rn-sae}, %ymm3, %xmm2
+# INTEL: vcvtuqq2ps xmm2, ymm3, {rn-sae}
+0x62,0xf1,0xfb,0x18,0x7a,0xd3
+
+# ATT:   vcvtuqq2ps {rd-sae}, %ymm3, %xmm2 {%k7}
+# INTEL: vcvtuqq2ps xmm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xfb,0x3f,0x7a,0xd3
+
+# ATT:   vcvtuqq2ps {rz-sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvtuqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xfb,0xff,0x7a,0xd3
+
+# ATT:   vcvtuw2ph {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtuw2ph ymm2, ymm3, {rn-sae}
+0x62,0xf5,0x7b,0x18,0x7d,0xd3
+
+# ATT:   vcvtuw2ph {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtuw2ph ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x7b,0x3f,0x7d,0xd3
+
+# ATT:   vcvtuw2ph {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtuw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x7b,0xff,0x7d,0xd3
+
+# ATT:   vcvtw2ph {rn-sae}, %ymm3, %ymm2
+# INTEL: vcvtw2ph ymm2, ymm3, {rn-sae}
+0x62,0xf5,0x7a,0x18,0x7d,0xd3
+
+# ATT:   vcvtw2ph {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vcvtw2ph ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x7a,0x3f,0x7d,0xd3
+
+# ATT:   vcvtw2ph {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvtw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x7a,0xff,0x7d,0xd3
+
+# ATT:   vdivpd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vdivpd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0xe1,0x18,0x5e,0xd4
+
+# ATT:   vdivpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vdivpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0xe1,0x3f,0x5e,0xd4
+
+# ATT:   vdivpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0xe1,0xff,0x5e,0xd4
+
+# ATT:   vdivph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vdivph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf5,0x60,0x18,0x5e,0xd4
+
+# ATT:   vdivph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vdivph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf5,0x60,0x3f,0x5e,0xd4
+
+# ATT:   vdivph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf5,0x60,0xff,0x5e,0xd4
+
+# ATT:   vdivps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vdivps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0x60,0x18,0x5e,0xd4
+
+# ATT:   vdivps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vdivps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0x60,0x3f,0x5e,0xd4
+
+# ATT:   vdivps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vdivps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0x60,0xff,0x5e,0xd4
+
+# ATT:   vfcmaddcph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfcmaddcph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x63,0x18,0x56,0xd4
+
+# ATT:   vfcmaddcph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfcmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x63,0x3f,0x56,0xd4
+
+# ATT:   vfcmaddcph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfcmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x63,0xff,0x56,0xd4
+
+# ATT:   vfcmulcph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfcmulcph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x63,0x18,0xd6,0xd4
+
+# ATT:   vfcmulcph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfcmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x63,0x3f,0xd6,0xd4
+
+# ATT:   vfcmulcph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfcmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x63,0xff,0xd6,0xd4
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfixupimmpd ymm2, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x18,0x54,0xd4,0x7b
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfixupimmpd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x1f,0x54,0xd4,0x7b
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfixupimmpd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x9f,0x54,0xd4,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfixupimmps ymm2, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x18,0x54,0xd4,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfixupimmps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x1f,0x54,0xd4,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfixupimmps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x9f,0x54,0xd4,0x7b
+
+# ATT:   vfmadd132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x98,0xd4
+
+# ATT:   vfmadd132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x98,0xd4
+
+# ATT:   vfmadd132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x98,0xd4
+
+# ATT:   vfmadd132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x98,0xd4
+
+# ATT:   vfmadd132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x98,0xd4
+
+# ATT:   vfmadd132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x98,0xd4
+
+# ATT:   vfmadd132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x98,0xd4
+
+# ATT:   vfmadd132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x98,0xd4
+
+# ATT:   vfmadd132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x98,0xd4
+
+# ATT:   vfmadd213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xa8,0xd4
+
+# ATT:   vfmadd213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xa8,0xd4
+
+# ATT:   vfmadd213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xa8,0xd4
+
+# ATT:   vfmadd213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xa8,0xd4
+
+# ATT:   vfmadd213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xa8,0xd4
+
+# ATT:   vfmadd213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xa8,0xd4
+
+# ATT:   vfmadd213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xa8,0xd4
+
+# ATT:   vfmadd213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xa8,0xd4
+
+# ATT:   vfmadd213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xa8,0xd4
+
+# ATT:   vfmadd231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xb8,0xd4
+
+# ATT:   vfmadd231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xb8,0xd4
+
+# ATT:   vfmadd231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xb8,0xd4
+
+# ATT:   vfmadd231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xb8,0xd4
+
+# ATT:   vfmadd231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xb8,0xd4
+
+# ATT:   vfmadd231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xb8,0xd4
+
+# ATT:   vfmadd231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xb8,0xd4
+
+# ATT:   vfmadd231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xb8,0xd4
+
+# ATT:   vfmadd231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xb8,0xd4
+
+# ATT:   vfmaddcph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddcph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x62,0x18,0x56,0xd4
+
+# ATT:   vfmaddcph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x62,0x3f,0x56,0xd4
+
+# ATT:   vfmaddcph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x62,0xff,0x56,0xd4
+
+# ATT:   vfmaddsub132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x96,0xd4
+
+# ATT:   vfmaddsub132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x96,0xd4
+
+# ATT:   vfmaddsub132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x96,0xd4
+
+# ATT:   vfmaddsub132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x96,0xd4
+
+# ATT:   vfmaddsub132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x96,0xd4
+
+# ATT:   vfmaddsub132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x96,0xd4
+
+# ATT:   vfmaddsub132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x96,0xd4
+
+# ATT:   vfmaddsub132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x96,0xd4
+
+# ATT:   vfmaddsub132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x96,0xd4
+
+# ATT:   vfmaddsub213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xa6,0xd4
+
+# ATT:   vfmaddsub213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xa6,0xd4
+
+# ATT:   vfmaddsub213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xa6,0xd4
+
+# ATT:   vfmaddsub213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xa6,0xd4
+
+# ATT:   vfmaddsub213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xa6,0xd4
+
+# ATT:   vfmaddsub213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xa6,0xd4
+
+# ATT:   vfmaddsub213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xa6,0xd4
+
+# ATT:   vfmaddsub213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xa6,0xd4
+
+# ATT:   vfmaddsub213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xa6,0xd4
+
+# ATT:   vfmaddsub231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xb6,0xd4
+
+# ATT:   vfmaddsub231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xb6,0xd4
+
+# ATT:   vfmaddsub231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xb6,0xd4
+
+# ATT:   vfmaddsub231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xb6,0xd4
+
+# ATT:   vfmaddsub231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xb6,0xd4
+
+# ATT:   vfmaddsub231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xb6,0xd4
+
+# ATT:   vfmaddsub231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmaddsub231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xb6,0xd4
+
+# ATT:   vfmaddsub231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmaddsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xb6,0xd4
+
+# ATT:   vfmaddsub231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmaddsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xb6,0xd4
+
+# ATT:   vfmsub132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x9a,0xd4
+
+# ATT:   vfmsub132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x9a,0xd4
+
+# ATT:   vfmsub132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x9a,0xd4
+
+# ATT:   vfmsub132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x9a,0xd4
+
+# ATT:   vfmsub132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x9a,0xd4
+
+# ATT:   vfmsub132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x9a,0xd4
+
+# ATT:   vfmsub132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x9a,0xd4
+
+# ATT:   vfmsub132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x9a,0xd4
+
+# ATT:   vfmsub132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x9a,0xd4
+
+# ATT:   vfmsub213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xaa,0xd4
+
+# ATT:   vfmsub213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xaa,0xd4
+
+# ATT:   vfmsub213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xaa,0xd4
+
+# ATT:   vfmsub213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xaa,0xd4
+
+# ATT:   vfmsub213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xaa,0xd4
+
+# ATT:   vfmsub213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xaa,0xd4
+
+# ATT:   vfmsub213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xaa,0xd4
+
+# ATT:   vfmsub213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xaa,0xd4
+
+# ATT:   vfmsub213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xaa,0xd4
+
+# ATT:   vfmsub231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xba,0xd4
+
+# ATT:   vfmsub231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xba,0xd4
+
+# ATT:   vfmsub231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xba,0xd4
+
+# ATT:   vfmsub231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xba,0xd4
+
+# ATT:   vfmsub231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xba,0xd4
+
+# ATT:   vfmsub231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xba,0xd4
+
+# ATT:   vfmsub231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xba,0xd4
+
+# ATT:   vfmsub231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xba,0xd4
+
+# ATT:   vfmsub231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xba,0xd4
+
+# ATT:   vfmsubadd132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x97,0xd4
+
+# ATT:   vfmsubadd132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x97,0xd4
+
+# ATT:   vfmsubadd132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x97,0xd4
+
+# ATT:   vfmsubadd132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x97,0xd4
+
+# ATT:   vfmsubadd132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x97,0xd4
+
+# ATT:   vfmsubadd132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x97,0xd4
+
+# ATT:   vfmsubadd132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x97,0xd4
+
+# ATT:   vfmsubadd132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x97,0xd4
+
+# ATT:   vfmsubadd132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x97,0xd4
+
+# ATT:   vfmsubadd213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xa7,0xd4
+
+# ATT:   vfmsubadd213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xa7,0xd4
+
+# ATT:   vfmsubadd213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xa7,0xd4
+
+# ATT:   vfmsubadd213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xa7,0xd4
+
+# ATT:   vfmsubadd213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xa7,0xd4
+
+# ATT:   vfmsubadd213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xa7,0xd4
+
+# ATT:   vfmsubadd213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xa7,0xd4
+
+# ATT:   vfmsubadd213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xa7,0xd4
+
+# ATT:   vfmsubadd213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xa7,0xd4
+
+# ATT:   vfmsubadd231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xb7,0xd4
+
+# ATT:   vfmsubadd231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xb7,0xd4
+
+# ATT:   vfmsubadd231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xb7,0xd4
+
+# ATT:   vfmsubadd231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xb7,0xd4
+
+# ATT:   vfmsubadd231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xb7,0xd4
+
+# ATT:   vfmsubadd231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xb7,0xd4
+
+# ATT:   vfmsubadd231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmsubadd231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xb7,0xd4
+
+# ATT:   vfmsubadd231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmsubadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xb7,0xd4
+
+# ATT:   vfmsubadd231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmsubadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xb7,0xd4
+
+# ATT:   vfmulcph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfmulcph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x62,0x18,0xd6,0xd4
+
+# ATT:   vfmulcph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x62,0x3f,0xd6,0xd4
+
+# ATT:   vfmulcph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x62,0xff,0xd6,0xd4
+
+# ATT:   vfnmadd132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x9c,0xd4
+
+# ATT:   vfnmadd132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x9c,0xd4
+
+# ATT:   vfnmadd132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x9c,0xd4
+
+# ATT:   vfnmadd132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x9c,0xd4
+
+# ATT:   vfnmadd132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x9c,0xd4
+
+# ATT:   vfnmadd132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x9c,0xd4
+
+# ATT:   vfnmadd132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x9c,0xd4
+
+# ATT:   vfnmadd132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x9c,0xd4
+
+# ATT:   vfnmadd132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x9c,0xd4
+
+# ATT:   vfnmadd213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xac,0xd4
+
+# ATT:   vfnmadd213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xac,0xd4
+
+# ATT:   vfnmadd213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xac,0xd4
+
+# ATT:   vfnmadd213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xac,0xd4
+
+# ATT:   vfnmadd213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xac,0xd4
+
+# ATT:   vfnmadd213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xac,0xd4
+
+# ATT:   vfnmadd213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xac,0xd4
+
+# ATT:   vfnmadd213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xac,0xd4
+
+# ATT:   vfnmadd213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xac,0xd4
+
+# ATT:   vfnmadd231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xbc,0xd4
+
+# ATT:   vfnmadd231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xbc,0xd4
+
+# ATT:   vfnmadd231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xbc,0xd4
+
+# ATT:   vfnmadd231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xbc,0xd4
+
+# ATT:   vfnmadd231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xbc,0xd4
+
+# ATT:   vfnmadd231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xbc,0xd4
+
+# ATT:   vfnmadd231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xbc,0xd4
+
+# ATT:   vfnmadd231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xbc,0xd4
+
+# ATT:   vfnmadd231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xbc,0xd4
+
+# ATT:   vfnmsub132pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x9e,0xd4
+
+# ATT:   vfnmsub132pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x9e,0xd4
+
+# ATT:   vfnmsub132pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x9e,0xd4
+
+# ATT:   vfnmsub132ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x9e,0xd4
+
+# ATT:   vfnmsub132ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x9e,0xd4
+
+# ATT:   vfnmsub132ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x9e,0xd4
+
+# ATT:   vfnmsub132ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x9e,0xd4
+
+# ATT:   vfnmsub132ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x9e,0xd4
+
+# ATT:   vfnmsub132ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x9e,0xd4
+
+# ATT:   vfnmsub213pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xae,0xd4
+
+# ATT:   vfnmsub213pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xae,0xd4
+
+# ATT:   vfnmsub213pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xae,0xd4
+
+# ATT:   vfnmsub213ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xae,0xd4
+
+# ATT:   vfnmsub213ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xae,0xd4
+
+# ATT:   vfnmsub213ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xae,0xd4
+
+# ATT:   vfnmsub213ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xae,0xd4
+
+# ATT:   vfnmsub213ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xae,0xd4
+
+# ATT:   vfnmsub213ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xae,0xd4
+
+# ATT:   vfnmsub231pd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0xbe,0xd4
+
+# ATT:   vfnmsub231pd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0xbe,0xd4
+
+# ATT:   vfnmsub231pd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0xbe,0xd4
+
+# ATT:   vfnmsub231ph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0xbe,0xd4
+
+# ATT:   vfnmsub231ph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0xbe,0xd4
+
+# ATT:   vfnmsub231ph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0xbe,0xd4
+
+# ATT:   vfnmsub231ps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vfnmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0xbe,0xd4
+
+# ATT:   vfnmsub231ps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vfnmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0xbe,0xd4
+
+# ATT:   vfnmsub231ps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vfnmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0xbe,0xd4
+
+# ATT:   vgetexppd {sae}, %ymm3, %ymm2
+# INTEL: vgetexppd ymm2, ymm3, {sae}
+0x62,0xf2,0xf9,0x18,0x42,0xd3
+
+# ATT:   vgetexppd {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetexppd ymm2 {k7}, ymm3, {sae}
+0x62,0xf2,0xf9,0x1f,0x42,0xd3
+
+# ATT:   vgetexppd {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetexppd ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf2,0xf9,0x9f,0x42,0xd3
+
+# ATT:   vgetexpph {sae}, %ymm3, %ymm2
+# INTEL: vgetexpph ymm2, ymm3, {sae}
+0x62,0xf6,0x79,0x18,0x42,0xd3
+
+# ATT:   vgetexpph {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetexpph ymm2 {k7}, ymm3, {sae}
+0x62,0xf6,0x79,0x1f,0x42,0xd3
+
+# ATT:   vgetexpph {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetexpph ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf6,0x79,0x9f,0x42,0xd3
+
+# ATT:   vgetexpps {sae}, %ymm3, %ymm2
+# INTEL: vgetexpps ymm2, ymm3, {sae}
+0x62,0xf2,0x79,0x18,0x42,0xd3
+
+# ATT:   vgetexpps {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetexpps ymm2 {k7}, ymm3, {sae}
+0x62,0xf2,0x79,0x1f,0x42,0xd3
+
+# ATT:   vgetexpps {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetexpps ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf2,0x79,0x9f,0x42,0xd3
+
+# ATT:   vgetmantpd $123, {sae}, %ymm3, %ymm2
+# INTEL: vgetmantpd ymm2, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x18,0x26,0xd3,0x7b
+
+# ATT:   vgetmantpd $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetmantpd ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x1f,0x26,0xd3,0x7b
+
+# ATT:   vgetmantpd $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetmantpd ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x9f,0x26,0xd3,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm3, %ymm2
+# INTEL: vgetmantph ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x18,0x26,0xd3,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetmantph ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x1f,0x26,0xd3,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetmantph ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x9f,0x26,0xd3,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm3, %ymm2
+# INTEL: vgetmantps ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x18,0x26,0xd3,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vgetmantps ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x1f,0x26,0xd3,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vgetmantps ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x9f,0x26,0xd3,0x7b
+
+# ATT:   vmaxpd {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmaxpd ymm2, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x18,0x5f,0xd4
+
+# ATT:   vmaxpd {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmaxpd ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x1f,0x5f,0xd4
+
+# ATT:   vmaxpd {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x9f,0x5f,0xd4
+
+# ATT:   vmaxph {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmaxph ymm2, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x18,0x5f,0xd4
+
+# ATT:   vmaxph {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmaxph ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x1f,0x5f,0xd4
+
+# ATT:   vmaxph {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x9f,0x5f,0xd4
+
+# ATT:   vmaxps {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmaxps ymm2, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x18,0x5f,0xd4
+
+# ATT:   vmaxps {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmaxps ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x1f,0x5f,0xd4
+
+# ATT:   vmaxps {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmaxps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x9f,0x5f,0xd4
+
+# ATT:   vminpd {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vminpd ymm2, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x18,0x5d,0xd4
+
+# ATT:   vminpd {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminpd ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x1f,0x5d,0xd4
+
+# ATT:   vminpd {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf1,0xe1,0x9f,0x5d,0xd4
+
+# ATT:   vminph {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vminph ymm2, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x18,0x5d,0xd4
+
+# ATT:   vminph {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminph ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x1f,0x5d,0xd4
+
+# ATT:   vminph {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf5,0x60,0x9f,0x5d,0xd4
+
+# ATT:   vminps {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vminps ymm2, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x18,0x5d,0xd4
+
+# ATT:   vminps {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vminps ymm2 {k7}, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x1f,0x5d,0xd4
+
+# ATT:   vminps {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vminps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+0x62,0xf1,0x60,0x9f,0x5d,0xd4
+
+# ATT:   vmulpd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmulpd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0xe1,0x18,0x59,0xd4
+
+# ATT:   vmulpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmulpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0xe1,0x3f,0x59,0xd4
+
+# ATT:   vmulpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0xe1,0xff,0x59,0xd4
+
+# ATT:   vmulph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmulph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf5,0x60,0x18,0x59,0xd4
+
+# ATT:   vmulph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmulph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf5,0x60,0x3f,0x59,0xd4
+
+# ATT:   vmulph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf5,0x60,0xff,0x59,0xd4
+
+# ATT:   vmulps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vmulps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0x60,0x18,0x59,0xd4
+
+# ATT:   vmulps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vmulps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0x60,0x3f,0x59,0xd4
+
+# ATT:   vmulps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vmulps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0x60,0xff,0x59,0xd4
+
+# ATT:   vrangepd $123, {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vrangepd ymm2, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x18,0x50,0xd4,0x7b
+
+# ATT:   vrangepd $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vrangepd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x1f,0x50,0xd4,0x7b
+
+# ATT:   vrangepd $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrangepd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0xe1,0x9f,0x50,0xd4,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vrangeps ymm2, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x18,0x50,0xd4,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vrangeps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x1f,0x50,0xd4,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrangeps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+0x62,0xf3,0x61,0x9f,0x50,0xd4,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm3, %ymm2
+# INTEL: vreducepd ymm2, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x18,0x56,0xd3,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vreducepd ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x1f,0x56,0xd3,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vreducepd ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x9f,0x56,0xd3,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm3, %ymm2
+# INTEL: vreduceph ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x18,0x56,0xd3,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vreduceph ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x1f,0x56,0xd3,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vreduceph ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x9f,0x56,0xd3,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm3, %ymm2
+# INTEL: vreduceps ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x18,0x56,0xd3,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vreduceps ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x1f,0x56,0xd3,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vreduceps ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x9f,0x56,0xd3,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm3, %ymm2
+# INTEL: vrndscalepd ymm2, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x18,0x09,0xd3,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vrndscalepd ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x1f,0x09,0xd3,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrndscalepd ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0xf9,0x9f,0x09,0xd3,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm3, %ymm2
+# INTEL: vrndscaleph ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x18,0x08,0xd3,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vrndscaleph ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x1f,0x08,0xd3,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrndscaleph ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x78,0x9f,0x08,0xd3,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm3, %ymm2
+# INTEL: vrndscaleps ymm2, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x18,0x08,0xd3,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vrndscaleps ymm2 {k7}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x1f,0x08,0xd3,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vrndscaleps ymm2 {k7} {z}, ymm3, {sae}, 123
+0x62,0xf3,0x79,0x9f,0x08,0xd3,0x7b
+
+# ATT:   vscalefpd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vscalefpd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0xe1,0x18,0x2c,0xd4
+
+# ATT:   vscalefpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vscalefpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0xe1,0x3f,0x2c,0xd4
+
+# ATT:   vscalefpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0xe1,0xff,0x2c,0xd4
+
+# ATT:   vscalefph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vscalefph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf6,0x61,0x18,0x2c,0xd4
+
+# ATT:   vscalefph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vscalefph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf6,0x61,0x3f,0x2c,0xd4
+
+# ATT:   vscalefph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf6,0x61,0xff,0x2c,0xd4
+
+# ATT:   vscalefps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vscalefps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf2,0x61,0x18,0x2c,0xd4
+
+# ATT:   vscalefps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vscalefps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf2,0x61,0x3f,0x2c,0xd4
+
+# ATT:   vscalefps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vscalefps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf2,0x61,0xff,0x2c,0xd4
+
+# ATT:   vsqrtpd {rn-sae}, %ymm3, %ymm2
+# INTEL: vsqrtpd ymm2, ymm3, {rn-sae}
+0x62,0xf1,0xf9,0x18,0x51,0xd3
+
+# ATT:   vsqrtpd {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vsqrtpd ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0xf9,0x3f,0x51,0xd3
+
+# ATT:   vsqrtpd {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsqrtpd ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0xf9,0xff,0x51,0xd3
+
+# ATT:   vsqrtph {rn-sae}, %ymm3, %ymm2
+# INTEL: vsqrtph ymm2, ymm3, {rn-sae}
+0x62,0xf5,0x78,0x18,0x51,0xd3
+
+# ATT:   vsqrtph {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vsqrtph ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf5,0x78,0x3f,0x51,0xd3
+
+# ATT:   vsqrtph {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsqrtph ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf5,0x78,0xff,0x51,0xd3
+
+# ATT:   vsqrtps {rn-sae}, %ymm3, %ymm2
+# INTEL: vsqrtps ymm2, ymm3, {rn-sae}
+0x62,0xf1,0x78,0x18,0x51,0xd3
+
+# ATT:   vsqrtps {rd-sae}, %ymm3, %ymm2 {%k7}
+# INTEL: vsqrtps ymm2 {k7}, ymm3, {rd-sae}
+0x62,0xf1,0x78,0x3f,0x51,0xd3
+
+# ATT:   vsqrtps {rz-sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsqrtps ymm2 {k7} {z}, ymm3, {rz-sae}
+0x62,0xf1,0x78,0xff,0x51,0xd3
+
+# ATT:   vsubpd {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vsubpd ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0xe1,0x18,0x5c,0xd4
+
+# ATT:   vsubpd {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vsubpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0xe1,0x3f,0x5c,0xd4
+
+# ATT:   vsubpd {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0xe1,0xff,0x5c,0xd4
+
+# ATT:   vsubph {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vsubph ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf5,0x60,0x18,0x5c,0xd4
+
+# ATT:   vsubph {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vsubph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf5,0x60,0x3f,0x5c,0xd4
+
+# ATT:   vsubph {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf5,0x60,0xff,0x5c,0xd4
+
+# ATT:   vsubps {rn-sae}, %ymm4, %ymm3, %ymm2
+# INTEL: vsubps ymm2, ymm3, ymm4, {rn-sae}
+0x62,0xf1,0x60,0x18,0x5c,0xd4
+
+# ATT:   vsubps {rd-sae}, %ymm4, %ymm3, %ymm2 {%k7}
+# INTEL: vsubps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+0x62,0xf1,0x60,0x3f,0x5c,0xd4
+
+# ATT:   vsubps {rz-sae}, %ymm4, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vsubps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+0x62,0xf1,0x60,0xff,0x5c,0xd4

diff  --git a/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
index 34f8851d04d6b..7f68e9d0da131 100644
--- a/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
+++ b/llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt
@@ -148,3 +148,1743 @@
 # ATT:   vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
 # INTEL: vaddps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
 0x62,0x81,0x40,0xf7,0x58,0xf0
+
+# ATT:   vcmppd $123, {sae}, %ymm24, %ymm23, %k5
+# INTEL: vcmppd k5, ymm23, ymm24, {sae}, 123
+0x62,0x91,0xc1,0x10,0xc2,0xe8,0x7b
+
+# ATT:   vcmppd $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+# INTEL: vcmppd k5 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x91,0xc1,0x17,0xc2,0xe8,0x7b
+
+# ATT:   vcmpph $123, {sae}, %ymm24, %ymm23, %k5
+# INTEL: vcmpph k5, ymm23, ymm24, {sae}, 123
+0x62,0x93,0x40,0x10,0xc2,0xe8,0x7b
+
+# ATT:   vcmpph $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+# INTEL: vcmpph k5 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x93,0x40,0x17,0xc2,0xe8,0x7b
+
+# ATT:   vcmpps $123, {sae}, %ymm24, %ymm23, %k5
+# INTEL: vcmpps k5, ymm23, ymm24, {sae}, 123
+0x62,0x91,0x40,0x10,0xc2,0xe8,0x7b
+
+# ATT:   vcmpps $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+# INTEL: vcmpps k5 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x91,0x40,0x17,0xc2,0xe8,0x7b
+
+# ATT:   vcvtdq2ph {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtdq2ph xmm22, ymm23, {rn-sae}
+0x62,0xa5,0x78,0x18,0x5b,0xf7
+
+# ATT:   vcvtdq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtdq2ph xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x78,0x3f,0x5b,0xf7
+
+# ATT:   vcvtdq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtdq2ph xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x78,0xff,0x5b,0xf7
+
+# ATT:   vcvtdq2ps {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtdq2ps ymm22, ymm23, {rn-sae}
+0x62,0xa1,0x78,0x18,0x5b,0xf7
+
+# ATT:   vcvtdq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtdq2ps ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0x78,0x3f,0x5b,0xf7
+
+# ATT:   vcvtdq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtdq2ps ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0x78,0xff,0x5b,0xf7
+
+# ATT:   vcvtpd2dq {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtpd2dq xmm22, ymm23, {rn-sae}
+0x62,0xa1,0xfb,0x18,0xe6,0xf7
+
+# ATT:   vcvtpd2dq {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtpd2dq xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xfb,0x3f,0xe6,0xf7
+
+# ATT:   vcvtpd2dq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtpd2dq xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xfb,0xff,0xe6,0xf7
+
+# ATT:   vcvtpd2ph {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtpd2ph xmm22, ymm23, {rn-sae}
+0x62,0xa5,0xf9,0x18,0x5a,0xf7
+
+# ATT:   vcvtpd2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtpd2ph xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0xf9,0x3f,0x5a,0xf7
+
+# ATT:   vcvtpd2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtpd2ph xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0xf9,0xff,0x5a,0xf7
+
+# ATT:   vcvtpd2ps {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtpd2ps xmm22, ymm23, {rn-sae}
+0x62,0xa1,0xf9,0x18,0x5a,0xf7
+
+# ATT:   vcvtpd2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtpd2ps xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf9,0x3f,0x5a,0xf7
+
+# ATT:   vcvtpd2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtpd2ps xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf9,0xff,0x5a,0xf7
+
+# ATT:   vcvtpd2qq {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtpd2qq ymm22, ymm23, {rn-sae}
+0x62,0xa1,0xf9,0x18,0x7b,0xf7
+
+# ATT:   vcvtpd2qq {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtpd2qq ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf9,0x3f,0x7b,0xf7
+
+# ATT:   vcvtpd2qq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtpd2qq ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf9,0xff,0x7b,0xf7
+
+# ATT:   vcvtpd2udq {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtpd2udq xmm22, ymm23, {rn-sae}
+0x62,0xa1,0xf8,0x18,0x79,0xf7
+
+# ATT:   vcvtpd2udq {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtpd2udq xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf8,0x3f,0x79,0xf7
+
+# ATT:   vcvtpd2udq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtpd2udq xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf8,0xff,0x79,0xf7
+
+# ATT:   vcvtpd2uqq {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtpd2uqq ymm22, ymm23, {rn-sae}
+0x62,0xa1,0xf9,0x18,0x79,0xf7
+
+# ATT:   vcvtpd2uqq {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtpd2uqq ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf9,0x3f,0x79,0xf7
+
+# ATT:   vcvtpd2uqq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtpd2uqq ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf9,0xff,0x79,0xf7
+
+# ATT:   vcvtph2dq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtph2dq ymm22, xmm23, {rn-sae}
+0x62,0xa5,0x79,0x18,0x5b,0xf7
+
+# ATT:   vcvtph2dq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2dq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa5,0x79,0x3f,0x5b,0xf7
+
+# ATT:   vcvtph2dq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2dq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa5,0x79,0xff,0x5b,0xf7
+
+# ATT:   vcvtph2pd {sae}, %xmm23, %ymm22
+# INTEL: vcvtph2pd ymm22, xmm23, {sae}
+0x62,0xa5,0x78,0x18,0x5a,0xf7
+
+# ATT:   vcvtph2pd {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2pd ymm22 {k7}, xmm23, {sae}
+0x62,0xa5,0x78,0x1f,0x5a,0xf7
+
+# ATT:   vcvtph2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2pd ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x78,0x9f,0x5a,0xf7
+
+# ATT:   vcvtph2ps {sae}, %xmm23, %ymm22
+# INTEL: vcvtph2ps ymm22, xmm23, {sae}
+0x62,0xa2,0x79,0x18,0x13,0xf7
+
+# ATT:   vcvtph2ps {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2ps ymm22 {k7}, xmm23, {sae}
+0x62,0xa2,0x79,0x1f,0x13,0xf7
+
+# ATT:   vcvtph2ps {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2ps ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa2,0x79,0x9f,0x13,0xf7
+
+# ATT:   vcvtph2psx {sae}, %xmm23, %ymm22
+# INTEL: vcvtph2psx ymm22, xmm23, {sae}
+0x62,0xa6,0x79,0x18,0x13,0xf7
+
+# ATT:   vcvtph2psx {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2psx ymm22 {k7}, xmm23, {sae}
+0x62,0xa6,0x79,0x1f,0x13,0xf7
+
+# ATT:   vcvtph2psx {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2psx ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa6,0x79,0x9f,0x13,0xf7
+
+# ATT:   vcvtph2qq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtph2qq ymm22, xmm23, {rn-sae}
+0x62,0xa5,0x79,0x18,0x7b,0xf7
+
+# ATT:   vcvtph2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2qq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa5,0x79,0x3f,0x7b,0xf7
+
+# ATT:   vcvtph2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2qq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa5,0x79,0xff,0x7b,0xf7
+
+# ATT:   vcvtph2udq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtph2udq ymm22, xmm23, {rn-sae}
+0x62,0xa5,0x78,0x18,0x79,0xf7
+
+# ATT:   vcvtph2udq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2udq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa5,0x78,0x3f,0x79,0xf7
+
+# ATT:   vcvtph2udq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2udq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa5,0x78,0xff,0x79,0xf7
+
+# ATT:   vcvtph2uqq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtph2uqq ymm22, xmm23, {rn-sae}
+0x62,0xa5,0x79,0x18,0x79,0xf7
+
+# ATT:   vcvtph2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtph2uqq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa5,0x79,0x3f,0x79,0xf7
+
+# ATT:   vcvtph2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2uqq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa5,0x79,0xff,0x79,0xf7
+
+# ATT:   vcvtph2uw {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtph2uw ymm22, ymm23, {rn-sae}
+0x62,0xa5,0x78,0x18,0x7d,0xf7
+
+# ATT:   vcvtph2uw {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtph2uw ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x78,0x3f,0x7d,0xf7
+
+# ATT:   vcvtph2uw {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2uw ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x78,0xff,0x7d,0xf7
+
+# ATT:   vcvtph2w {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtph2w ymm22, ymm23, {rn-sae}
+0x62,0xa5,0x79,0x18,0x7d,0xf7
+
+# ATT:   vcvtph2w {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtph2w ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x79,0x3f,0x7d,0xf7
+
+# ATT:   vcvtph2w {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtph2w ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x79,0xff,0x7d,0xf7
+
+# ATT:   vcvtps2dq {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtps2dq ymm22, ymm23, {rn-sae}
+0x62,0xa1,0x79,0x18,0x5b,0xf7
+
+# ATT:   vcvtps2dq {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtps2dq ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0x79,0x3f,0x5b,0xf7
+
+# ATT:   vcvtps2dq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtps2dq ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0x79,0xff,0x5b,0xf7
+
+# ATT:   vcvtps2pd {sae}, %xmm23, %ymm22
+# INTEL: vcvtps2pd ymm22, xmm23, {sae}
+0x62,0xa1,0x78,0x18,0x5a,0xf7
+
+# ATT:   vcvtps2pd {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtps2pd ymm22 {k7}, xmm23, {sae}
+0x62,0xa1,0x78,0x1f,0x5a,0xf7
+
+# ATT:   vcvtps2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtps2pd ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa1,0x78,0x9f,0x5a,0xf7
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm23, %xmm22
+# INTEL: vcvtps2ph xmm22, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x18,0x1d,0xfe,0x7b
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtps2ph xmm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x1f,0x1d,0xfe,0x7b
+
+# ATT:   vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtps2ph xmm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x9f,0x1d,0xfe,0x7b
+
+# ATT:   vcvtps2phx {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtps2phx xmm22, ymm23, {rn-sae}
+0x62,0xa5,0x79,0x18,0x1d,0xf7
+
+# ATT:   vcvtps2phx {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtps2phx xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x79,0x3f,0x1d,0xf7
+
+# ATT:   vcvtps2phx {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtps2phx xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x79,0xff,0x1d,0xf7
+
+# ATT:   vcvtps2qq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtps2qq ymm22, xmm23, {rn-sae}
+0x62,0xa1,0x79,0x18,0x7b,0xf7
+
+# ATT:   vcvtps2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtps2qq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa1,0x79,0x3f,0x7b,0xf7
+
+# ATT:   vcvtps2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtps2qq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa1,0x79,0xff,0x7b,0xf7
+
+# ATT:   vcvtps2udq {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtps2udq ymm22, ymm23, {rn-sae}
+0x62,0xa1,0x78,0x18,0x79,0xf7
+
+# ATT:   vcvtps2udq {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtps2udq ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0x78,0x3f,0x79,0xf7
+
+# ATT:   vcvtps2udq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtps2udq ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0x78,0xff,0x79,0xf7
+
+# ATT:   vcvtps2uqq {rn-sae}, %xmm23, %ymm22
+# INTEL: vcvtps2uqq ymm22, xmm23, {rn-sae}
+0x62,0xa1,0x79,0x18,0x79,0xf7
+
+# ATT:   vcvtps2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvtps2uqq ymm22 {k7}, xmm23, {rd-sae}
+0x62,0xa1,0x79,0x3f,0x79,0xf7
+
+# ATT:   vcvtps2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvtps2uqq ymm22 {k7} {z}, xmm23, {rz-sae}
+0x62,0xa1,0x79,0xff,0x79,0xf7
+
+# ATT:   vcvtqq2pd {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtqq2pd ymm22, ymm23, {rn-sae}
+0x62,0xa1,0xfa,0x18,0xe6,0xf7
+
+# ATT:   vcvtqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtqq2pd ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xfa,0x3f,0xe6,0xf7
+
+# ATT:   vcvtqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtqq2pd ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xfa,0xff,0xe6,0xf7
+
+# ATT:   vcvtqq2ph {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtqq2ph xmm22, ymm23, {rn-sae}
+0x62,0xa5,0xf8,0x18,0x5b,0xf7
+
+# ATT:   vcvtqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtqq2ph xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0xf8,0x3f,0x5b,0xf7
+
+# ATT:   vcvtqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtqq2ph xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0xf8,0xff,0x5b,0xf7
+
+# ATT:   vcvtqq2ps {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtqq2ps xmm22, ymm23, {rn-sae}
+0x62,0xa1,0xf8,0x18,0x5b,0xf7
+
+# ATT:   vcvtqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtqq2ps xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf8,0x3f,0x5b,0xf7
+
+# ATT:   vcvtqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtqq2ps xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf8,0xff,0x5b,0xf7
+
+# ATT:   vcvttpd2dq {sae}, %ymm23, %xmm22
+# INTEL: vcvttpd2dq xmm22, ymm23, {sae}
+0x62,0xa1,0xf9,0x18,0xe6,0xf7
+
+# ATT:   vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvttpd2dq xmm22 {k7}, ymm23, {sae}
+0x62,0xa1,0xf9,0x1f,0xe6,0xf7
+
+# ATT:   vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dq xmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0xf9,0x9f,0xe6,0xf7
+
+# ATT:   vcvttpd2qq {sae}, %ymm23, %ymm22
+# INTEL: vcvttpd2qq ymm22, ymm23, {sae}
+0x62,0xa1,0xf9,0x18,0x7a,0xf7
+
+# ATT:   vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttpd2qq ymm22 {k7}, ymm23, {sae}
+0x62,0xa1,0xf9,0x1f,0x7a,0xf7
+
+# ATT:   vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2qq ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0xf9,0x9f,0x7a,0xf7
+
+# ATT:   vcvttpd2udq {sae}, %ymm23, %xmm22
+# INTEL: vcvttpd2udq xmm22, ymm23, {sae}
+0x62,0xa1,0xf8,0x18,0x78,0xf7
+
+# ATT:   vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvttpd2udq xmm22 {k7}, ymm23, {sae}
+0x62,0xa1,0xf8,0x1f,0x78,0xf7
+
+# ATT:   vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udq xmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0xf8,0x9f,0x78,0xf7
+
+# ATT:   vcvttpd2uqq {sae}, %ymm23, %ymm22
+# INTEL: vcvttpd2uqq ymm22, ymm23, {sae}
+0x62,0xa1,0xf9,0x18,0x78,0xf7
+
+# ATT:   vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttpd2uqq ymm22 {k7}, ymm23, {sae}
+0x62,0xa1,0xf9,0x1f,0x78,0xf7
+
+# ATT:   vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2uqq ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0xf9,0x9f,0x78,0xf7
+
+# ATT:   vcvttph2dq {sae}, %xmm23, %ymm22
+# INTEL: vcvttph2dq ymm22, xmm23, {sae}
+0x62,0xa5,0x7a,0x18,0x5b,0xf7
+
+# ATT:   vcvttph2dq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttph2dq ymm22 {k7}, xmm23, {sae}
+0x62,0xa5,0x7a,0x1f,0x5b,0xf7
+
+# ATT:   vcvttph2dq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2dq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x7a,0x9f,0x5b,0xf7
+
+# ATT:   vcvttph2qq {sae}, %xmm23, %ymm22
+# INTEL: vcvttph2qq ymm22, xmm23, {sae}
+0x62,0xa5,0x79,0x18,0x7a,0xf7
+
+# ATT:   vcvttph2qq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttph2qq ymm22 {k7}, xmm23, {sae}
+0x62,0xa5,0x79,0x1f,0x7a,0xf7
+
+# ATT:   vcvttph2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2qq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x79,0x9f,0x7a,0xf7
+
+# ATT:   vcvttph2udq {sae}, %xmm23, %ymm22
+# INTEL: vcvttph2udq ymm22, xmm23, {sae}
+0x62,0xa5,0x78,0x18,0x78,0xf7
+
+# ATT:   vcvttph2udq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttph2udq ymm22 {k7}, xmm23, {sae}
+0x62,0xa5,0x78,0x1f,0x78,0xf7
+
+# ATT:   vcvttph2udq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2udq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x78,0x9f,0x78,0xf7
+
+# ATT:   vcvttph2uqq {sae}, %xmm23, %ymm22
+# INTEL: vcvttph2uqq ymm22, xmm23, {sae}
+0x62,0xa5,0x79,0x18,0x78,0xf7
+
+# ATT:   vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttph2uqq ymm22 {k7}, xmm23, {sae}
+0x62,0xa5,0x79,0x1f,0x78,0xf7
+
+# ATT:   vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2uqq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x79,0x9f,0x78,0xf7
+
+# ATT:   vcvttph2uw {sae}, %ymm23, %ymm22
+# INTEL: vcvttph2uw ymm22, ymm23, {sae}
+0x62,0xa5,0x78,0x18,0x7c,0xf7
+
+# ATT:   vcvttph2uw {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttph2uw ymm22 {k7}, ymm23, {sae}
+0x62,0xa5,0x78,0x1f,0x7c,0xf7
+
+# ATT:   vcvttph2uw {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2uw ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x78,0x9f,0x7c,0xf7
+
+# ATT:   vcvttph2w {sae}, %ymm23, %ymm22
+# INTEL: vcvttph2w ymm22, ymm23, {sae}
+0x62,0xa5,0x79,0x18,0x7c,0xf7
+
+# ATT:   vcvttph2w {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttph2w ymm22 {k7}, ymm23, {sae}
+0x62,0xa5,0x79,0x1f,0x7c,0xf7
+
+# ATT:   vcvttph2w {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttph2w ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x79,0x9f,0x7c,0xf7
+
+# ATT:   vcvttps2dq {sae}, %ymm23, %ymm22
+# INTEL: vcvttps2dq ymm22, ymm23, {sae}
+0x62,0xa1,0x7a,0x18,0x5b,0xf7
+
+# ATT:   vcvttps2dq {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttps2dq ymm22 {k7}, ymm23, {sae}
+0x62,0xa1,0x7a,0x1f,0x5b,0xf7
+
+# ATT:   vcvttps2dq {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2dq ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0x7a,0x9f,0x5b,0xf7
+
+# ATT:   vcvttps2qq {sae}, %xmm23, %ymm22
+# INTEL: vcvttps2qq ymm22, xmm23, {sae}
+0x62,0xa1,0x79,0x18,0x7a,0xf7
+
+# ATT:   vcvttps2qq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttps2qq ymm22 {k7}, xmm23, {sae}
+0x62,0xa1,0x79,0x1f,0x7a,0xf7
+
+# ATT:   vcvttps2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2qq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa1,0x79,0x9f,0x7a,0xf7
+
+# ATT:   vcvttps2udq {sae}, %ymm23, %ymm22
+# INTEL: vcvttps2udq ymm22, ymm23, {sae}
+0x62,0xa1,0x78,0x18,0x78,0xf7
+
+# ATT:   vcvttps2udq {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvttps2udq ymm22 {k7}, ymm23, {sae}
+0x62,0xa1,0x78,0x1f,0x78,0xf7
+
+# ATT:   vcvttps2udq {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2udq ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa1,0x78,0x9f,0x78,0xf7
+
+# ATT:   vcvttps2uqq {sae}, %xmm23, %ymm22
+# INTEL: vcvttps2uqq ymm22, xmm23, {sae}
+0x62,0xa1,0x79,0x18,0x78,0xf7
+
+# ATT:   vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7}
+# INTEL: vcvttps2uqq ymm22 {k7}, xmm23, {sae}
+0x62,0xa1,0x79,0x1f,0x78,0xf7
+
+# ATT:   vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2uqq ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa1,0x79,0x9f,0x78,0xf7
+
+# ATT:   vcvtudq2ph {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtudq2ph xmm22, ymm23, {rn-sae}
+0x62,0xa5,0x7b,0x18,0x7a,0xf7
+
+# ATT:   vcvtudq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtudq2ph xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x7b,0x3f,0x7a,0xf7
+
+# ATT:   vcvtudq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtudq2ph xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x7b,0xff,0x7a,0xf7
+
+# ATT:   vcvtudq2ps {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtudq2ps ymm22, ymm23, {rn-sae}
+0x62,0xa1,0x7b,0x18,0x7a,0xf7
+
+# ATT:   vcvtudq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtudq2ps ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0x7b,0x3f,0x7a,0xf7
+
+# ATT:   vcvtudq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtudq2ps ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0x7b,0xff,0x7a,0xf7
+
+# ATT:   vcvtuqq2pd {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtuqq2pd ymm22, ymm23, {rn-sae}
+0x62,0xa1,0xfa,0x18,0x7a,0xf7
+
+# ATT:   vcvtuqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtuqq2pd ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xfa,0x3f,0x7a,0xf7
+
+# ATT:   vcvtuqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtuqq2pd ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xfa,0xff,0x7a,0xf7
+
+# ATT:   vcvtuqq2ph {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtuqq2ph xmm22, ymm23, {rn-sae}
+0x62,0xa5,0xfb,0x18,0x7a,0xf7
+
+# ATT:   vcvtuqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtuqq2ph xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0xfb,0x3f,0x7a,0xf7
+
+# ATT:   vcvtuqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0xfb,0xff,0x7a,0xf7
+
+# ATT:   vcvtuqq2ps {rn-sae}, %ymm23, %xmm22
+# INTEL: vcvtuqq2ps xmm22, ymm23, {rn-sae}
+0x62,0xa1,0xfb,0x18,0x7a,0xf7
+
+# ATT:   vcvtuqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+# INTEL: vcvtuqq2ps xmm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xfb,0x3f,0x7a,0xf7
+
+# ATT:   vcvtuqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvtuqq2ps xmm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xfb,0xff,0x7a,0xf7
+
+# ATT:   vcvtuw2ph {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtuw2ph ymm22, ymm23, {rn-sae}
+0x62,0xa5,0x7b,0x18,0x7d,0xf7
+
+# ATT:   vcvtuw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtuw2ph ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x7b,0x3f,0x7d,0xf7
+
+# ATT:   vcvtuw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtuw2ph ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x7b,0xff,0x7d,0xf7
+
+# ATT:   vcvtw2ph {rn-sae}, %ymm23, %ymm22
+# INTEL: vcvtw2ph ymm22, ymm23, {rn-sae}
+0x62,0xa5,0x7a,0x18,0x7d,0xf7
+
+# ATT:   vcvtw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vcvtw2ph ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x7a,0x3f,0x7d,0xf7
+
+# ATT:   vcvtw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvtw2ph ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x7a,0xff,0x7d,0xf7
+
+# ATT:   vdivpd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vdivpd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0xc1,0x10,0x5e,0xf0
+
+# ATT:   vdivpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vdivpd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0xc1,0x37,0x5e,0xf0
+
+# ATT:   vdivpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0xc1,0xf7,0x5e,0xf0
+
+# ATT:   vdivph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vdivph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x85,0x40,0x10,0x5e,0xf0
+
+# ATT:   vdivph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vdivph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x85,0x40,0x37,0x5e,0xf0
+
+# ATT:   vdivph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x85,0x40,0xf7,0x5e,0xf0
+
+# ATT:   vdivps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vdivps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0x40,0x10,0x5e,0xf0
+
+# ATT:   vdivps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vdivps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0x40,0x37,0x5e,0xf0
+
+# ATT:   vdivps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vdivps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0x40,0xf7,0x5e,0xf0
+
+# ATT:   vfcmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfcmaddcph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x43,0x10,0x56,0xf0
+
+# ATT:   vfcmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfcmaddcph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x43,0x37,0x56,0xf0
+
+# ATT:   vfcmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfcmaddcph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x43,0xf7,0x56,0xf0
+
+# ATT:   vfcmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfcmulcph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x43,0x10,0xd6,0xf0
+
+# ATT:   vfcmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfcmulcph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x43,0x37,0xd6,0xf0
+
+# ATT:   vfcmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfcmulcph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x43,0xf7,0xd6,0xf0
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfixupimmpd ymm22, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x10,0x54,0xf0,0x7b
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfixupimmpd ymm22 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x17,0x54,0xf0,0x7b
+
+# ATT:   vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfixupimmpd ymm22 {k7} {z}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x97,0x54,0xf0,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfixupimmps ymm22, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x10,0x54,0xf0,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfixupimmps ymm22 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x17,0x54,0xf0,0x7b
+
+# ATT:   vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfixupimmps ymm22 {k7} {z}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x97,0x54,0xf0,0x7b
+
+# ATT:   vfmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x98,0xf0
+
+# ATT:   vfmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x98,0xf0
+
+# ATT:   vfmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x98,0xf0
+
+# ATT:   vfmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x98,0xf0
+
+# ATT:   vfmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x98,0xf0
+
+# ATT:   vfmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x98,0xf0
+
+# ATT:   vfmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x98,0xf0
+
+# ATT:   vfmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x98,0xf0
+
+# ATT:   vfmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x98,0xf0
+
+# ATT:   vfmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xa8,0xf0
+
+# ATT:   vfmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xa8,0xf0
+
+# ATT:   vfmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xa8,0xf0
+
+# ATT:   vfmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xa8,0xf0
+
+# ATT:   vfmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xa8,0xf0
+
+# ATT:   vfmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xa8,0xf0
+
+# ATT:   vfmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xa8,0xf0
+
+# ATT:   vfmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xa8,0xf0
+
+# ATT:   vfmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xa8,0xf0
+
+# ATT:   vfmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xb8,0xf0
+
+# ATT:   vfmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xb8,0xf0
+
+# ATT:   vfmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xb8,0xf0
+
+# ATT:   vfmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xb8,0xf0
+
+# ATT:   vfmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xb8,0xf0
+
+# ATT:   vfmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xb8,0xf0
+
+# ATT:   vfmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmadd231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xb8,0xf0
+
+# ATT:   vfmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmadd231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xb8,0xf0
+
+# ATT:   vfmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmadd231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xb8,0xf0
+
+# ATT:   vfmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddcph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x42,0x10,0x56,0xf0
+
+# ATT:   vfmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddcph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x42,0x37,0x56,0xf0
+
+# ATT:   vfmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddcph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x42,0xf7,0x56,0xf0
+
+# ATT:   vfmaddsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x96,0xf0
+
+# ATT:   vfmaddsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x96,0xf0
+
+# ATT:   vfmaddsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x96,0xf0
+
+# ATT:   vfmaddsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x96,0xf0
+
+# ATT:   vfmaddsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x96,0xf0
+
+# ATT:   vfmaddsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x96,0xf0
+
+# ATT:   vfmaddsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x96,0xf0
+
+# ATT:   vfmaddsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x96,0xf0
+
+# ATT:   vfmaddsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x96,0xf0
+
+# ATT:   vfmaddsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xa6,0xf0
+
+# ATT:   vfmaddsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xa6,0xf0
+
+# ATT:   vfmaddsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xa6,0xf0
+
+# ATT:   vfmaddsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xa6,0xf0
+
+# ATT:   vfmaddsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xa6,0xf0
+
+# ATT:   vfmaddsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xa6,0xf0
+
+# ATT:   vfmaddsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xa6,0xf0
+
+# ATT:   vfmaddsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xa6,0xf0
+
+# ATT:   vfmaddsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xa6,0xf0
+
+# ATT:   vfmaddsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xb6,0xf0
+
+# ATT:   vfmaddsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xb6,0xf0
+
+# ATT:   vfmaddsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xb6,0xf0
+
+# ATT:   vfmaddsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xb6,0xf0
+
+# ATT:   vfmaddsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xb6,0xf0
+
+# ATT:   vfmaddsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xb6,0xf0
+
+# ATT:   vfmaddsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmaddsub231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xb6,0xf0
+
+# ATT:   vfmaddsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmaddsub231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xb6,0xf0
+
+# ATT:   vfmaddsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmaddsub231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xb6,0xf0
+
+# ATT:   vfmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x9a,0xf0
+
+# ATT:   vfmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x9a,0xf0
+
+# ATT:   vfmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x9a,0xf0
+
+# ATT:   vfmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x9a,0xf0
+
+# ATT:   vfmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x9a,0xf0
+
+# ATT:   vfmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x9a,0xf0
+
+# ATT:   vfmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x9a,0xf0
+
+# ATT:   vfmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x9a,0xf0
+
+# ATT:   vfmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x9a,0xf0
+
+# ATT:   vfmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xaa,0xf0
+
+# ATT:   vfmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xaa,0xf0
+
+# ATT:   vfmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xaa,0xf0
+
+# ATT:   vfmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xaa,0xf0
+
+# ATT:   vfmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xaa,0xf0
+
+# ATT:   vfmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xaa,0xf0
+
+# ATT:   vfmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xaa,0xf0
+
+# ATT:   vfmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xaa,0xf0
+
+# ATT:   vfmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xaa,0xf0
+
+# ATT:   vfmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xba,0xf0
+
+# ATT:   vfmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xba,0xf0
+
+# ATT:   vfmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xba,0xf0
+
+# ATT:   vfmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xba,0xf0
+
+# ATT:   vfmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xba,0xf0
+
+# ATT:   vfmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xba,0xf0
+
+# ATT:   vfmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsub231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xba,0xf0
+
+# ATT:   vfmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsub231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xba,0xf0
+
+# ATT:   vfmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsub231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xba,0xf0
+
+# ATT:   vfmsubadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x97,0xf0
+
+# ATT:   vfmsubadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x97,0xf0
+
+# ATT:   vfmsubadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x97,0xf0
+
+# ATT:   vfmsubadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x97,0xf0
+
+# ATT:   vfmsubadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x97,0xf0
+
+# ATT:   vfmsubadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x97,0xf0
+
+# ATT:   vfmsubadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x97,0xf0
+
+# ATT:   vfmsubadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x97,0xf0
+
+# ATT:   vfmsubadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x97,0xf0
+
+# ATT:   vfmsubadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xa7,0xf0
+
+# ATT:   vfmsubadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xa7,0xf0
+
+# ATT:   vfmsubadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xa7,0xf0
+
+# ATT:   vfmsubadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xa7,0xf0
+
+# ATT:   vfmsubadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xa7,0xf0
+
+# ATT:   vfmsubadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xa7,0xf0
+
+# ATT:   vfmsubadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xa7,0xf0
+
+# ATT:   vfmsubadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xa7,0xf0
+
+# ATT:   vfmsubadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xa7,0xf0
+
+# ATT:   vfmsubadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xb7,0xf0
+
+# ATT:   vfmsubadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xb7,0xf0
+
+# ATT:   vfmsubadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xb7,0xf0
+
+# ATT:   vfmsubadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xb7,0xf0
+
+# ATT:   vfmsubadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xb7,0xf0
+
+# ATT:   vfmsubadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xb7,0xf0
+
+# ATT:   vfmsubadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmsubadd231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xb7,0xf0
+
+# ATT:   vfmsubadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmsubadd231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xb7,0xf0
+
+# ATT:   vfmsubadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmsubadd231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xb7,0xf0
+
+# ATT:   vfmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfmulcph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x42,0x10,0xd6,0xf0
+
+# ATT:   vfmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfmulcph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x42,0x37,0xd6,0xf0
+
+# ATT:   vfmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfmulcph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x42,0xf7,0xd6,0xf0
+
+# ATT:   vfnmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x9c,0xf0
+
+# ATT:   vfnmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x9c,0xf0
+
+# ATT:   vfnmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x9c,0xf0
+
+# ATT:   vfnmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x9c,0xf0
+
+# ATT:   vfnmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x9c,0xf0
+
+# ATT:   vfnmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x9c,0xf0
+
+# ATT:   vfnmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x9c,0xf0
+
+# ATT:   vfnmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x9c,0xf0
+
+# ATT:   vfnmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x9c,0xf0
+
+# ATT:   vfnmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xac,0xf0
+
+# ATT:   vfnmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xac,0xf0
+
+# ATT:   vfnmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xac,0xf0
+
+# ATT:   vfnmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xac,0xf0
+
+# ATT:   vfnmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xac,0xf0
+
+# ATT:   vfnmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xac,0xf0
+
+# ATT:   vfnmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xac,0xf0
+
+# ATT:   vfnmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xac,0xf0
+
+# ATT:   vfnmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xac,0xf0
+
+# ATT:   vfnmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xbc,0xf0
+
+# ATT:   vfnmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xbc,0xf0
+
+# ATT:   vfnmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xbc,0xf0
+
+# ATT:   vfnmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xbc,0xf0
+
+# ATT:   vfnmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xbc,0xf0
+
+# ATT:   vfnmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xbc,0xf0
+
+# ATT:   vfnmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmadd231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xbc,0xf0
+
+# ATT:   vfnmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmadd231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xbc,0xf0
+
+# ATT:   vfnmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmadd231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xbc,0xf0
+
+# ATT:   vfnmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub132pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x9e,0xf0
+
+# ATT:   vfnmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x9e,0xf0
+
+# ATT:   vfnmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x9e,0xf0
+
+# ATT:   vfnmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub132ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x9e,0xf0
+
+# ATT:   vfnmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x9e,0xf0
+
+# ATT:   vfnmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x9e,0xf0
+
+# ATT:   vfnmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub132ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x9e,0xf0
+
+# ATT:   vfnmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub132ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x9e,0xf0
+
+# ATT:   vfnmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub132ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x9e,0xf0
+
+# ATT:   vfnmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub213pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xae,0xf0
+
+# ATT:   vfnmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xae,0xf0
+
+# ATT:   vfnmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xae,0xf0
+
+# ATT:   vfnmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub213ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xae,0xf0
+
+# ATT:   vfnmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xae,0xf0
+
+# ATT:   vfnmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xae,0xf0
+
+# ATT:   vfnmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub213ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xae,0xf0
+
+# ATT:   vfnmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub213ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xae,0xf0
+
+# ATT:   vfnmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub213ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xae,0xf0
+
+# ATT:   vfnmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub231pd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0xbe,0xf0
+
+# ATT:   vfnmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231pd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0xbe,0xf0
+
+# ATT:   vfnmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231pd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0xbe,0xf0
+
+# ATT:   vfnmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub231ph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0xbe,0xf0
+
+# ATT:   vfnmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231ph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0xbe,0xf0
+
+# ATT:   vfnmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231ph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0xbe,0xf0
+
+# ATT:   vfnmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vfnmsub231ps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0xbe,0xf0
+
+# ATT:   vfnmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vfnmsub231ps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0xbe,0xf0
+
+# ATT:   vfnmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vfnmsub231ps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0xbe,0xf0
+
+# ATT:   vgetexppd {sae}, %ymm23, %ymm22
+# INTEL: vgetexppd ymm22, ymm23, {sae}
+0x62,0xa2,0xf9,0x18,0x42,0xf7
+
+# ATT:   vgetexppd {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetexppd ymm22 {k7}, ymm23, {sae}
+0x62,0xa2,0xf9,0x1f,0x42,0xf7
+
+# ATT:   vgetexppd {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetexppd ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa2,0xf9,0x9f,0x42,0xf7
+
+# ATT:   vgetexpph {sae}, %ymm23, %ymm22
+# INTEL: vgetexpph ymm22, ymm23, {sae}
+0x62,0xa6,0x79,0x18,0x42,0xf7
+
+# ATT:   vgetexpph {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetexpph ymm22 {k7}, ymm23, {sae}
+0x62,0xa6,0x79,0x1f,0x42,0xf7
+
+# ATT:   vgetexpph {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetexpph ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa6,0x79,0x9f,0x42,0xf7
+
+# ATT:   vgetexpps {sae}, %ymm23, %ymm22
+# INTEL: vgetexpps ymm22, ymm23, {sae}
+0x62,0xa2,0x79,0x18,0x42,0xf7
+
+# ATT:   vgetexpps {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetexpps ymm22 {k7}, ymm23, {sae}
+0x62,0xa2,0x79,0x1f,0x42,0xf7
+
+# ATT:   vgetexpps {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetexpps ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa2,0x79,0x9f,0x42,0xf7
+
+# ATT:   vgetmantpd $123, {sae}, %ymm23, %ymm22
+# INTEL: vgetmantpd ymm22, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x18,0x26,0xf7,0x7b
+
+# ATT:   vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetmantpd ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x1f,0x26,0xf7,0x7b
+
+# ATT:   vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetmantpd ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x9f,0x26,0xf7,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm23, %ymm22
+# INTEL: vgetmantph ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x18,0x26,0xf7,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetmantph ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x1f,0x26,0xf7,0x7b
+
+# ATT:   vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetmantph ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x9f,0x26,0xf7,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm23, %ymm22
+# INTEL: vgetmantps ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x18,0x26,0xf7,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vgetmantps ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x1f,0x26,0xf7,0x7b
+
+# ATT:   vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vgetmantps ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x9f,0x26,0xf7,0x7b
+
+# ATT:   vmaxpd {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmaxpd ymm22, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x10,0x5f,0xf0
+
+# ATT:   vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmaxpd ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x17,0x5f,0xf0
+
+# ATT:   vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxpd ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x97,0x5f,0xf0
+
+# ATT:   vmaxph {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmaxph ymm22, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x10,0x5f,0xf0
+
+# ATT:   vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmaxph ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x17,0x5f,0xf0
+
+# ATT:   vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxph ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x97,0x5f,0xf0
+
+# ATT:   vmaxps {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmaxps ymm22, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x10,0x5f,0xf0
+
+# ATT:   vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmaxps ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x17,0x5f,0xf0
+
+# ATT:   vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmaxps ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x97,0x5f,0xf0
+
+# ATT:   vminpd {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vminpd ymm22, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x10,0x5d,0xf0
+
+# ATT:   vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminpd ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x17,0x5d,0xf0
+
+# ATT:   vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminpd ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x81,0xc1,0x97,0x5d,0xf0
+
+# ATT:   vminph {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vminph ymm22, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x10,0x5d,0xf0
+
+# ATT:   vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminph ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x17,0x5d,0xf0
+
+# ATT:   vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminph ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x85,0x40,0x97,0x5d,0xf0
+
+# ATT:   vminps {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vminps ymm22, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x10,0x5d,0xf0
+
+# ATT:   vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vminps ymm22 {k7}, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x17,0x5d,0xf0
+
+# ATT:   vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vminps ymm22 {k7} {z}, ymm23, ymm24, {sae}
+0x62,0x81,0x40,0x97,0x5d,0xf0
+
+# ATT:   vmulpd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmulpd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0xc1,0x10,0x59,0xf0
+
+# ATT:   vmulpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmulpd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0xc1,0x37,0x59,0xf0
+
+# ATT:   vmulpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0xc1,0xf7,0x59,0xf0
+
+# ATT:   vmulph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmulph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x85,0x40,0x10,0x59,0xf0
+
+# ATT:   vmulph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmulph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x85,0x40,0x37,0x59,0xf0
+
+# ATT:   vmulph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x85,0x40,0xf7,0x59,0xf0
+
+# ATT:   vmulps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vmulps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0x40,0x10,0x59,0xf0
+
+# ATT:   vmulps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vmulps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0x40,0x37,0x59,0xf0
+
+# ATT:   vmulps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vmulps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0x40,0xf7,0x59,0xf0
+
+# ATT:   vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vrangepd ymm22, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x10,0x50,0xf0,0x7b
+
+# ATT:   vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vrangepd ymm22 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x17,0x50,0xf0,0x7b
+
+# ATT:   vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrangepd ymm22 {k7} {z}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0xc1,0x97,0x50,0xf0,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vrangeps ymm22, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x10,0x50,0xf0,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vrangeps ymm22 {k7}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x17,0x50,0xf0,0x7b
+
+# ATT:   vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrangeps ymm22 {k7} {z}, ymm23, ymm24, {sae}, 123
+0x62,0x83,0x41,0x97,0x50,0xf0,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm23, %ymm22
+# INTEL: vreducepd ymm22, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x18,0x56,0xf7,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vreducepd ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x1f,0x56,0xf7,0x7b
+
+# ATT:   vreducepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vreducepd ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x9f,0x56,0xf7,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm23, %ymm22
+# INTEL: vreduceph ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x18,0x56,0xf7,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vreduceph ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x1f,0x56,0xf7,0x7b
+
+# ATT:   vreduceph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vreduceph ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x9f,0x56,0xf7,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm23, %ymm22
+# INTEL: vreduceps ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x18,0x56,0xf7,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vreduceps ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x1f,0x56,0xf7,0x7b
+
+# ATT:   vreduceps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vreduceps ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x9f,0x56,0xf7,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm23, %ymm22
+# INTEL: vrndscalepd ymm22, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x18,0x09,0xf7,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vrndscalepd ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x1f,0x09,0xf7,0x7b
+
+# ATT:   vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrndscalepd ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0xf9,0x9f,0x09,0xf7,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm23, %ymm22
+# INTEL: vrndscaleph ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x18,0x08,0xf7,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vrndscaleph ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x1f,0x08,0xf7,0x7b
+
+# ATT:   vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrndscaleph ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x78,0x9f,0x08,0xf7,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm23, %ymm22
+# INTEL: vrndscaleps ymm22, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x18,0x08,0xf7,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vrndscaleps ymm22 {k7}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x1f,0x08,0xf7,0x7b
+
+# ATT:   vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vrndscaleps ymm22 {k7} {z}, ymm23, {sae}, 123
+0x62,0xa3,0x79,0x9f,0x08,0xf7,0x7b
+
+# ATT:   vscalefpd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vscalefpd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0xc1,0x10,0x2c,0xf0
+
+# ATT:   vscalefpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vscalefpd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0xc1,0x37,0x2c,0xf0
+
+# ATT:   vscalefpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0xc1,0xf7,0x2c,0xf0
+
+# ATT:   vscalefph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vscalefph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x86,0x41,0x10,0x2c,0xf0
+
+# ATT:   vscalefph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vscalefph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x86,0x41,0x37,0x2c,0xf0
+
+# ATT:   vscalefph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x86,0x41,0xf7,0x2c,0xf0
+
+# ATT:   vscalefps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vscalefps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x82,0x41,0x10,0x2c,0xf0
+
+# ATT:   vscalefps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vscalefps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x82,0x41,0x37,0x2c,0xf0
+
+# ATT:   vscalefps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vscalefps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x82,0x41,0xf7,0x2c,0xf0
+
+# ATT:   vsqrtpd {rn-sae}, %ymm23, %ymm22
+# INTEL: vsqrtpd ymm22, ymm23, {rn-sae}
+0x62,0xa1,0xf9,0x18,0x51,0xf7
+
+# ATT:   vsqrtpd {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vsqrtpd ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0xf9,0x3f,0x51,0xf7
+
+# ATT:   vsqrtpd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsqrtpd ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0xf9,0xff,0x51,0xf7
+
+# ATT:   vsqrtph {rn-sae}, %ymm23, %ymm22
+# INTEL: vsqrtph ymm22, ymm23, {rn-sae}
+0x62,0xa5,0x78,0x18,0x51,0xf7
+
+# ATT:   vsqrtph {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vsqrtph ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa5,0x78,0x3f,0x51,0xf7
+
+# ATT:   vsqrtph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsqrtph ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa5,0x78,0xff,0x51,0xf7
+
+# ATT:   vsqrtps {rn-sae}, %ymm23, %ymm22
+# INTEL: vsqrtps ymm22, ymm23, {rn-sae}
+0x62,0xa1,0x78,0x18,0x51,0xf7
+
+# ATT:   vsqrtps {rd-sae}, %ymm23, %ymm22 {%k7}
+# INTEL: vsqrtps ymm22 {k7}, ymm23, {rd-sae}
+0x62,0xa1,0x78,0x3f,0x51,0xf7
+
+# ATT:   vsqrtps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsqrtps ymm22 {k7} {z}, ymm23, {rz-sae}
+0x62,0xa1,0x78,0xff,0x51,0xf7
+
+# ATT:   vsubpd {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vsubpd ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0xc1,0x10,0x5c,0xf0
+
+# ATT:   vsubpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vsubpd ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0xc1,0x37,0x5c,0xf0
+
+# ATT:   vsubpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubpd ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0xc1,0xf7,0x5c,0xf0
+
+# ATT:   vsubph {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vsubph ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x85,0x40,0x10,0x5c,0xf0
+
+# ATT:   vsubph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vsubph ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x85,0x40,0x37,0x5c,0xf0
+
+# ATT:   vsubph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubph ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x85,0x40,0xf7,0x5c,0xf0
+
+# ATT:   vsubps {rn-sae}, %ymm24, %ymm23, %ymm22
+# INTEL: vsubps ymm22, ymm23, ymm24, {rn-sae}
+0x62,0x81,0x40,0x10,0x5c,0xf0
+
+# ATT:   vsubps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+# INTEL: vsubps ymm22 {k7}, ymm23, ymm24, {rd-sae}
+0x62,0x81,0x40,0x37,0x5c,0xf0
+
+# ATT:   vsubps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vsubps ymm22 {k7} {z}, ymm23, ymm24, {rz-sae}
+0x62,0x81,0x40,0xf7,0x5c,0xf0

diff  --git a/llvm/test/MC/X86/avx10_2ni-32-intel.s b/llvm/test/MC/X86/avx10_2ni-32-intel.s
index ea9a89f316cc3..5dbc1c226e67a 100644
--- a/llvm/test/MC/X86/avx10_2ni-32-intel.s
+++ b/llvm/test/MC/X86/avx10_2ni-32-intel.s
@@ -147,3 +147,1743 @@
 // CHECK: vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
 // CHECK: encoding: [0x62,0xf1,0x60,0xff,0x58,0xd4]
           vaddps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vcmppd k5, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0xc2,0xec,0x7b]
+          vcmppd k5, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcmppd k5 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf1,0xe1,0x1f,0xc2,0xec,0x7b]
+          vcmppd k5 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcmpph k5, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x60,0x18,0xc2,0xec,0x7b]
+          vcmpph k5, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcmpph k5 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x60,0x1f,0xc2,0xec,0x7b]
+          vcmpph k5 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcmpps k5, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0xc2,0xec,0x7b]
+          vcmpps k5, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcmpps k5 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf1,0x60,0x1f,0xc2,0xec,0x7b]
+          vcmpps k5 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vcvtdq2ph xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x5b,0xd3]
+          vcvtdq2ph xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtdq2ph xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x3f,0x5b,0xd3]
+          vcvtdq2ph xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtdq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0xff,0x5b,0xd3]
+          vcvtdq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtdq2ps ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x18,0x5b,0xd3]
+          vcvtdq2ps ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtdq2ps ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x3f,0x5b,0xd3]
+          vcvtdq2ps ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtdq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0xff,0x5b,0xd3]
+          vcvtdq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2dq xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0x18,0xe6,0xd3]
+          vcvtpd2dq xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2dq xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0x3f,0xe6,0xd3]
+          vcvtpd2dq xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2dq xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0xff,0xe6,0xd3]
+          vcvtpd2dq xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2ph xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x18,0x5a,0xd3]
+          vcvtpd2ph xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2ph xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x3f,0x5a,0xd3]
+          vcvtpd2ph xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0xff,0x5a,0xd3]
+          vcvtpd2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2ps xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x5a,0xd3]
+          vcvtpd2ps xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2ps xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x3f,0x5a,0xd3]
+          vcvtpd2ps xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0xff,0x5a,0xd3]
+          vcvtpd2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2qq ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x7b,0xd3]
+          vcvtpd2qq ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2qq ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x3f,0x7b,0xd3]
+          vcvtpd2qq ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2qq ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0xff,0x7b,0xd3]
+          vcvtpd2qq ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2udq xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x18,0x79,0xd3]
+          vcvtpd2udq xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2udq xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x3f,0x79,0xd3]
+          vcvtpd2udq xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2udq xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0xff,0x79,0xd3]
+          vcvtpd2udq xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtpd2uqq ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x79,0xd3]
+          vcvtpd2uqq ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtpd2uqq ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x3f,0x79,0xd3]
+          vcvtpd2uqq ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtpd2uqq ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0xff,0x79,0xd3]
+          vcvtpd2uqq ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtph2dq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x5b,0xd3]
+          vcvtph2dq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtph2dq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x3f,0x5b,0xd3]
+          vcvtph2dq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtph2dq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0xff,0x5b,0xd3]
+          vcvtph2dq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtph2pd ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x5a,0xd3]
+          vcvtph2pd ymm2, xmm3, {sae}
+
+// CHECK: vcvtph2pd ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x1f,0x5a,0xd3]
+          vcvtph2pd ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvtph2pd ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x5a,0xd3]
+          vcvtph2pd ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvtph2ps ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x18,0x13,0xd3]
+          vcvtph2ps ymm2, xmm3, {sae}
+
+// CHECK: vcvtph2ps ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x1f,0x13,0xd3]
+          vcvtph2ps ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvtph2ps ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x9f,0x13,0xd3]
+          vcvtph2ps ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvtph2psx ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x18,0x13,0xd3]
+          vcvtph2psx ymm2, xmm3, {sae}
+
+// CHECK: vcvtph2psx ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x1f,0x13,0xd3]
+          vcvtph2psx ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvtph2psx ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x9f,0x13,0xd3]
+          vcvtph2psx ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvtph2qq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x7b,0xd3]
+          vcvtph2qq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtph2qq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x3f,0x7b,0xd3]
+          vcvtph2qq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtph2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0xff,0x7b,0xd3]
+          vcvtph2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtph2udq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x79,0xd3]
+          vcvtph2udq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtph2udq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x3f,0x79,0xd3]
+          vcvtph2udq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtph2udq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0xff,0x79,0xd3]
+          vcvtph2udq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtph2uqq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x79,0xd3]
+          vcvtph2uqq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtph2uqq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x3f,0x79,0xd3]
+          vcvtph2uqq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtph2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0xff,0x79,0xd3]
+          vcvtph2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtph2uw ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x7d,0xd3]
+          vcvtph2uw ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtph2uw ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x3f,0x7d,0xd3]
+          vcvtph2uw ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtph2uw ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0xff,0x7d,0xd3]
+          vcvtph2uw ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtph2w ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x7d,0xd3]
+          vcvtph2w ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtph2w ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x3f,0x7d,0xd3]
+          vcvtph2w ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtph2w ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0xff,0x7d,0xd3]
+          vcvtph2w ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtps2dq ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x18,0x5b,0xd3]
+          vcvtps2dq ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtps2dq ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x3f,0x5b,0xd3]
+          vcvtps2dq ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtps2dq ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0xff,0x5b,0xd3]
+          vcvtps2dq ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtps2pd ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x18,0x5a,0xd3]
+          vcvtps2pd ymm2, xmm3, {sae}
+
+// CHECK: vcvtps2pd ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x1f,0x5a,0xd3]
+          vcvtps2pd ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvtps2pd ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x9f,0x5a,0xd3]
+          vcvtps2pd ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvtps2ph xmm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x18,0x1d,0xda,0x7b]
+          vcvtps2ph xmm2, ymm3, {sae}, 123
+
+// CHECK: vcvtps2ph xmm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x1f,0x1d,0xda,0x7b]
+          vcvtps2ph xmm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vcvtps2ph xmm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x9f,0x1d,0xda,0x7b]
+          vcvtps2ph xmm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vcvtps2phx xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x1d,0xd3]
+          vcvtps2phx xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtps2phx xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x3f,0x1d,0xd3]
+          vcvtps2phx xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtps2phx xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0xff,0x1d,0xd3]
+          vcvtps2phx xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtps2qq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x18,0x7b,0xd3]
+          vcvtps2qq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtps2qq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x3f,0x7b,0xd3]
+          vcvtps2qq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtps2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0xff,0x7b,0xd3]
+          vcvtps2qq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtps2udq ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x18,0x79,0xd3]
+          vcvtps2udq ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtps2udq ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x3f,0x79,0xd3]
+          vcvtps2udq ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtps2udq ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0xff,0x79,0xd3]
+          vcvtps2udq ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtps2uqq ymm2, xmm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x18,0x79,0xd3]
+          vcvtps2uqq ymm2, xmm3, {rn-sae}
+
+// CHECK: vcvtps2uqq ymm2 {k7}, xmm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x3f,0x79,0xd3]
+          vcvtps2uqq ymm2 {k7}, xmm3, {rd-sae}
+
+// CHECK: vcvtps2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0xff,0x79,0xd3]
+          vcvtps2uqq ymm2 {k7} {z}, xmm3, {rz-sae}
+
+// CHECK: vcvtqq2pd ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0x18,0xe6,0xd3]
+          vcvtqq2pd ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtqq2pd ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0x3f,0xe6,0xd3]
+          vcvtqq2pd ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0xff,0xe6,0xd3]
+          vcvtqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtqq2ph xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x18,0x5b,0xd3]
+          vcvtqq2ph xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtqq2ph xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x3f,0x5b,0xd3]
+          vcvtqq2ph xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0xff,0x5b,0xd3]
+          vcvtqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtqq2ps xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x18,0x5b,0xd3]
+          vcvtqq2ps xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtqq2ps xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x3f,0x5b,0xd3]
+          vcvtqq2ps xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0xff,0x5b,0xd3]
+          vcvtqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvttpd2dq xmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0xe6,0xd3]
+          vcvttpd2dq xmm2, ymm3, {sae}
+
+// CHECK: vcvttpd2dq xmm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x1f,0xe6,0xd3]
+          vcvttpd2dq xmm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttpd2dq xmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x9f,0xe6,0xd3]
+          vcvttpd2dq xmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2qq ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x7a,0xd3]
+          vcvttpd2qq ymm2, ymm3, {sae}
+
+// CHECK: vcvttpd2qq ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x1f,0x7a,0xd3]
+          vcvttpd2qq ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttpd2qq ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x9f,0x7a,0xd3]
+          vcvttpd2qq ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2udq xmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x18,0x78,0xd3]
+          vcvttpd2udq xmm2, ymm3, {sae}
+
+// CHECK: vcvttpd2udq xmm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x1f,0x78,0xd3]
+          vcvttpd2udq xmm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttpd2udq xmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf8,0x9f,0x78,0xd3]
+          vcvttpd2udq xmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2uqq ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x78,0xd3]
+          vcvttpd2uqq ymm2, ymm3, {sae}
+
+// CHECK: vcvttpd2uqq ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x1f,0x78,0xd3]
+          vcvttpd2uqq ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttpd2uqq ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x9f,0x78,0xd3]
+          vcvttpd2uqq ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttph2dq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0x18,0x5b,0xd3]
+          vcvttph2dq ymm2, xmm3, {sae}
+
+// CHECK: vcvttph2dq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0x1f,0x5b,0xd3]
+          vcvttph2dq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttph2dq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0x9f,0x5b,0xd3]
+          vcvttph2dq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttph2qq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x7a,0xd3]
+          vcvttph2qq ymm2, xmm3, {sae}
+
+// CHECK: vcvttph2qq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x1f,0x7a,0xd3]
+          vcvttph2qq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttph2qq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x7a,0xd3]
+          vcvttph2qq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttph2udq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x78,0xd3]
+          vcvttph2udq ymm2, xmm3, {sae}
+
+// CHECK: vcvttph2udq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x1f,0x78,0xd3]
+          vcvttph2udq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttph2udq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x78,0xd3]
+          vcvttph2udq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttph2uqq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x78,0xd3]
+          vcvttph2uqq ymm2, xmm3, {sae}
+
+// CHECK: vcvttph2uqq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x1f,0x78,0xd3]
+          vcvttph2uqq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttph2uqq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x78,0xd3]
+          vcvttph2uqq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttph2uw ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x7c,0xd3]
+          vcvttph2uw ymm2, ymm3, {sae}
+
+// CHECK: vcvttph2uw ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x1f,0x7c,0xd3]
+          vcvttph2uw ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttph2uw ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x7c,0xd3]
+          vcvttph2uw ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttph2w ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x7c,0xd3]
+          vcvttph2w ymm2, ymm3, {sae}
+
+// CHECK: vcvttph2w ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x1f,0x7c,0xd3]
+          vcvttph2w ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttph2w ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x7c,0xd3]
+          vcvttph2w ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2dq ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x7a,0x18,0x5b,0xd3]
+          vcvttps2dq ymm2, ymm3, {sae}
+
+// CHECK: vcvttps2dq ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x7a,0x1f,0x5b,0xd3]
+          vcvttps2dq ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttps2dq ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x7a,0x9f,0x5b,0xd3]
+          vcvttps2dq ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2qq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x18,0x7a,0xd3]
+          vcvttps2qq ymm2, xmm3, {sae}
+
+// CHECK: vcvttps2qq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x1f,0x7a,0xd3]
+          vcvttps2qq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttps2qq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x9f,0x7a,0xd3]
+          vcvttps2qq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttps2udq ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x18,0x78,0xd3]
+          vcvttps2udq ymm2, ymm3, {sae}
+
+// CHECK: vcvttps2udq ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x1f,0x78,0xd3]
+          vcvttps2udq ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vcvttps2udq ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x9f,0x78,0xd3]
+          vcvttps2udq ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2uqq ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x18,0x78,0xd3]
+          vcvttps2uqq ymm2, xmm3, {sae}
+
+// CHECK: vcvttps2uqq ymm2 {k7}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x1f,0x78,0xd3]
+          vcvttps2uqq ymm2 {k7}, xmm3, {sae}
+
+// CHECK: vcvttps2uqq ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x79,0x9f,0x78,0xd3]
+          vcvttps2uqq ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvtudq2ph xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0x18,0x7a,0xd3]
+          vcvtudq2ph xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtudq2ph xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0x3f,0x7a,0xd3]
+          vcvtudq2ph xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtudq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0xff,0x7a,0xd3]
+          vcvtudq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtudq2ps ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x7b,0x18,0x7a,0xd3]
+          vcvtudq2ps ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtudq2ps ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x7b,0x3f,0x7a,0xd3]
+          vcvtudq2ps ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtudq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x7b,0xff,0x7a,0xd3]
+          vcvtudq2ps ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtuqq2pd ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0x18,0x7a,0xd3]
+          vcvtuqq2pd ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtuqq2pd ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0x3f,0x7a,0xd3]
+          vcvtuqq2pd ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtuqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xfa,0xff,0x7a,0xd3]
+          vcvtuqq2pd ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtuqq2ph xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xfb,0x18,0x7a,0xd3]
+          vcvtuqq2ph xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtuqq2ph xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0xfb,0x3f,0x7a,0xd3]
+          vcvtuqq2ph xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtuqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0xfb,0xff,0x7a,0xd3]
+          vcvtuqq2ph xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtuqq2ps xmm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0x18,0x7a,0xd3]
+          vcvtuqq2ps xmm2, ymm3, {rn-sae}
+
+// CHECK: vcvtuqq2ps xmm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0x3f,0x7a,0xd3]
+          vcvtuqq2ps xmm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtuqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xfb,0xff,0x7a,0xd3]
+          vcvtuqq2ps xmm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtuw2ph ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0x18,0x7d,0xd3]
+          vcvtuw2ph ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtuw2ph ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0x3f,0x7d,0xd3]
+          vcvtuw2ph ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtuw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x7b,0xff,0x7d,0xd3]
+          vcvtuw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vcvtw2ph ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0x18,0x7d,0xd3]
+          vcvtw2ph ymm2, ymm3, {rn-sae}
+
+// CHECK: vcvtw2ph ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0x3f,0x7d,0xd3]
+          vcvtw2ph ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vcvtw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x7a,0xff,0x7d,0xd3]
+          vcvtw2ph ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vdivpd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x5e,0xd4]
+          vdivpd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vdivpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x3f,0x5e,0xd4]
+          vdivpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vdivpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0xff,0x5e,0xd4]
+          vdivpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vdivph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x5e,0xd4]
+          vdivph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vdivph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x3f,0x5e,0xd4]
+          vdivph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vdivph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0xff,0x5e,0xd4]
+          vdivph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vdivps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x5e,0xd4]
+          vdivps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vdivps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x3f,0x5e,0xd4]
+          vdivps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vdivps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0xff,0x5e,0xd4]
+          vdivps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfcmaddcph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0x18,0x56,0xd4]
+          vfcmaddcph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfcmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0x3f,0x56,0xd4]
+          vfcmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfcmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0xff,0x56,0xd4]
+          vfcmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfcmulcph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0x18,0xd6,0xd4]
+          vfcmulcph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfcmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0x3f,0xd6,0xd4]
+          vfcmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfcmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x63,0xff,0xd6,0xd4]
+          vfcmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfixupimmpd ymm2, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x18,0x54,0xd4,0x7b]
+          vfixupimmpd ymm2, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfixupimmpd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x1f,0x54,0xd4,0x7b]
+          vfixupimmpd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfixupimmpd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x9f,0x54,0xd4,0x7b]
+          vfixupimmpd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfixupimmps ymm2, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x18,0x54,0xd4,0x7b]
+          vfixupimmps ymm2, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfixupimmps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x1f,0x54,0xd4,0x7b]
+          vfixupimmps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfixupimmps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x9f,0x54,0xd4,0x7b]
+          vfixupimmps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vfmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x98,0xd4]
+          vfmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x98,0xd4]
+          vfmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x98,0xd4]
+          vfmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x98,0xd4]
+          vfmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x98,0xd4]
+          vfmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x98,0xd4]
+          vfmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x98,0xd4]
+          vfmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x98,0xd4]
+          vfmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x98,0xd4]
+          vfmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xa8,0xd4]
+          vfmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xa8,0xd4]
+          vfmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xa8,0xd4]
+          vfmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xa8,0xd4]
+          vfmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xa8,0xd4]
+          vfmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xa8,0xd4]
+          vfmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xa8,0xd4]
+          vfmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xa8,0xd4]
+          vfmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xa8,0xd4]
+          vfmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xb8,0xd4]
+          vfmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xb8,0xd4]
+          vfmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xb8,0xd4]
+          vfmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xb8,0xd4]
+          vfmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xb8,0xd4]
+          vfmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xb8,0xd4]
+          vfmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xb8,0xd4]
+          vfmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xb8,0xd4]
+          vfmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xb8,0xd4]
+          vfmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddcph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0x18,0x56,0xd4]
+          vfmaddcph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0x3f,0x56,0xd4]
+          vfmaddcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0xff,0x56,0xd4]
+          vfmaddcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x96,0xd4]
+          vfmaddsub132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x96,0xd4]
+          vfmaddsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x96,0xd4]
+          vfmaddsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x96,0xd4]
+          vfmaddsub132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x96,0xd4]
+          vfmaddsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x96,0xd4]
+          vfmaddsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x96,0xd4]
+          vfmaddsub132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x96,0xd4]
+          vfmaddsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x96,0xd4]
+          vfmaddsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xa6,0xd4]
+          vfmaddsub213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xa6,0xd4]
+          vfmaddsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xa6,0xd4]
+          vfmaddsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xa6,0xd4]
+          vfmaddsub213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xa6,0xd4]
+          vfmaddsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xa6,0xd4]
+          vfmaddsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xa6,0xd4]
+          vfmaddsub213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xa6,0xd4]
+          vfmaddsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xa6,0xd4]
+          vfmaddsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xb6,0xd4]
+          vfmaddsub231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xb6,0xd4]
+          vfmaddsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xb6,0xd4]
+          vfmaddsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xb6,0xd4]
+          vfmaddsub231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xb6,0xd4]
+          vfmaddsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xb6,0xd4]
+          vfmaddsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmaddsub231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xb6,0xd4]
+          vfmaddsub231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmaddsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xb6,0xd4]
+          vfmaddsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmaddsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xb6,0xd4]
+          vfmaddsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x9a,0xd4]
+          vfmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x9a,0xd4]
+          vfmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x9a,0xd4]
+          vfmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x9a,0xd4]
+          vfmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x9a,0xd4]
+          vfmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x9a,0xd4]
+          vfmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x9a,0xd4]
+          vfmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x9a,0xd4]
+          vfmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x9a,0xd4]
+          vfmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xaa,0xd4]
+          vfmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xaa,0xd4]
+          vfmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xaa,0xd4]
+          vfmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xaa,0xd4]
+          vfmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xaa,0xd4]
+          vfmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xaa,0xd4]
+          vfmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xaa,0xd4]
+          vfmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xaa,0xd4]
+          vfmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xaa,0xd4]
+          vfmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xba,0xd4]
+          vfmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xba,0xd4]
+          vfmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xba,0xd4]
+          vfmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xba,0xd4]
+          vfmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xba,0xd4]
+          vfmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xba,0xd4]
+          vfmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xba,0xd4]
+          vfmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xba,0xd4]
+          vfmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xba,0xd4]
+          vfmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x97,0xd4]
+          vfmsubadd132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x97,0xd4]
+          vfmsubadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x97,0xd4]
+          vfmsubadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x97,0xd4]
+          vfmsubadd132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x97,0xd4]
+          vfmsubadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x97,0xd4]
+          vfmsubadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x97,0xd4]
+          vfmsubadd132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x97,0xd4]
+          vfmsubadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x97,0xd4]
+          vfmsubadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xa7,0xd4]
+          vfmsubadd213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xa7,0xd4]
+          vfmsubadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xa7,0xd4]
+          vfmsubadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xa7,0xd4]
+          vfmsubadd213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xa7,0xd4]
+          vfmsubadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xa7,0xd4]
+          vfmsubadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xa7,0xd4]
+          vfmsubadd213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xa7,0xd4]
+          vfmsubadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xa7,0xd4]
+          vfmsubadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xb7,0xd4]
+          vfmsubadd231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xb7,0xd4]
+          vfmsubadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xb7,0xd4]
+          vfmsubadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xb7,0xd4]
+          vfmsubadd231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xb7,0xd4]
+          vfmsubadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xb7,0xd4]
+          vfmsubadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmsubadd231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xb7,0xd4]
+          vfmsubadd231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmsubadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xb7,0xd4]
+          vfmsubadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmsubadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xb7,0xd4]
+          vfmsubadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfmulcph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0x18,0xd6,0xd4]
+          vfmulcph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0x3f,0xd6,0xd4]
+          vfmulcph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x62,0xff,0xd6,0xd4]
+          vfmulcph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x9c,0xd4]
+          vfnmadd132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x9c,0xd4]
+          vfnmadd132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x9c,0xd4]
+          vfnmadd132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x9c,0xd4]
+          vfnmadd132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x9c,0xd4]
+          vfnmadd132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x9c,0xd4]
+          vfnmadd132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x9c,0xd4]
+          vfnmadd132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x9c,0xd4]
+          vfnmadd132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x9c,0xd4]
+          vfnmadd132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xac,0xd4]
+          vfnmadd213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xac,0xd4]
+          vfnmadd213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xac,0xd4]
+          vfnmadd213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xac,0xd4]
+          vfnmadd213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xac,0xd4]
+          vfnmadd213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xac,0xd4]
+          vfnmadd213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xac,0xd4]
+          vfnmadd213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xac,0xd4]
+          vfnmadd213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xac,0xd4]
+          vfnmadd213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xbc,0xd4]
+          vfnmadd231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xbc,0xd4]
+          vfnmadd231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xbc,0xd4]
+          vfnmadd231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xbc,0xd4]
+          vfnmadd231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xbc,0xd4]
+          vfnmadd231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xbc,0xd4]
+          vfnmadd231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xbc,0xd4]
+          vfnmadd231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xbc,0xd4]
+          vfnmadd231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xbc,0xd4]
+          vfnmadd231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x9e,0xd4]
+          vfnmsub132pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x9e,0xd4]
+          vfnmsub132pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x9e,0xd4]
+          vfnmsub132pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x9e,0xd4]
+          vfnmsub132ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x9e,0xd4]
+          vfnmsub132ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x9e,0xd4]
+          vfnmsub132ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x9e,0xd4]
+          vfnmsub132ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x9e,0xd4]
+          vfnmsub132ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x9e,0xd4]
+          vfnmsub132ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xae,0xd4]
+          vfnmsub213pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xae,0xd4]
+          vfnmsub213pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xae,0xd4]
+          vfnmsub213pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xae,0xd4]
+          vfnmsub213ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xae,0xd4]
+          vfnmsub213ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xae,0xd4]
+          vfnmsub213ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xae,0xd4]
+          vfnmsub213ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xae,0xd4]
+          vfnmsub213ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xae,0xd4]
+          vfnmsub213ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0xbe,0xd4]
+          vfnmsub231pd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0xbe,0xd4]
+          vfnmsub231pd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0xbe,0xd4]
+          vfnmsub231pd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0xbe,0xd4]
+          vfnmsub231ph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0xbe,0xd4]
+          vfnmsub231ph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0xbe,0xd4]
+          vfnmsub231ph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vfnmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0xbe,0xd4]
+          vfnmsub231ps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vfnmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0xbe,0xd4]
+          vfnmsub231ps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vfnmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0xbe,0xd4]
+          vfnmsub231ps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vgetexppd ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0xf9,0x18,0x42,0xd3]
+          vgetexppd ymm2, ymm3, {sae}
+
+// CHECK: vgetexppd ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0xf9,0x1f,0x42,0xd3]
+          vgetexppd ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vgetexppd ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0xf9,0x9f,0x42,0xd3]
+          vgetexppd ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vgetexpph ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x18,0x42,0xd3]
+          vgetexpph ymm2, ymm3, {sae}
+
+// CHECK: vgetexpph ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x1f,0x42,0xd3]
+          vgetexpph ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vgetexpph ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf6,0x79,0x9f,0x42,0xd3]
+          vgetexpph ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vgetexpps ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x18,0x42,0xd3]
+          vgetexpps ymm2, ymm3, {sae}
+
+// CHECK: vgetexpps ymm2 {k7}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x1f,0x42,0xd3]
+          vgetexpps ymm2 {k7}, ymm3, {sae}
+
+// CHECK: vgetexpps ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf2,0x79,0x9f,0x42,0xd3]
+          vgetexpps ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vgetmantpd ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x18,0x26,0xd3,0x7b]
+          vgetmantpd ymm2, ymm3, {sae}, 123
+
+// CHECK: vgetmantpd ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x1f,0x26,0xd3,0x7b]
+          vgetmantpd ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vgetmantpd ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x9f,0x26,0xd3,0x7b]
+          vgetmantpd ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vgetmantph ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x18,0x26,0xd3,0x7b]
+          vgetmantph ymm2, ymm3, {sae}, 123
+
+// CHECK: vgetmantph ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x1f,0x26,0xd3,0x7b]
+          vgetmantph ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vgetmantph ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x9f,0x26,0xd3,0x7b]
+          vgetmantph ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vgetmantps ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x18,0x26,0xd3,0x7b]
+          vgetmantps ymm2, ymm3, {sae}, 123
+
+// CHECK: vgetmantps ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x1f,0x26,0xd3,0x7b]
+          vgetmantps ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vgetmantps ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x9f,0x26,0xd3,0x7b]
+          vgetmantps ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vmaxpd ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x5f,0xd4]
+          vmaxpd ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vmaxpd ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x1f,0x5f,0xd4]
+          vmaxpd ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vmaxpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x9f,0x5f,0xd4]
+          vmaxpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vmaxph ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x5f,0xd4]
+          vmaxph ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vmaxph ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x1f,0x5f,0xd4]
+          vmaxph ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vmaxph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x9f,0x5f,0xd4]
+          vmaxph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vmaxps ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x5f,0xd4]
+          vmaxps ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vmaxps ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x1f,0x5f,0xd4]
+          vmaxps ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vmaxps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x9f,0x5f,0xd4]
+          vmaxps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vminpd ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x5d,0xd4]
+          vminpd ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vminpd ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x1f,0x5d,0xd4]
+          vminpd ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vminpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x9f,0x5d,0xd4]
+          vminpd ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vminph ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x5d,0xd4]
+          vminph ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vminph ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x1f,0x5d,0xd4]
+          vminph ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vminph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x9f,0x5d,0xd4]
+          vminph ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vminps ymm2, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x5d,0xd4]
+          vminps ymm2, ymm3, ymm4, {sae}
+
+// CHECK: vminps ymm2 {k7}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x1f,0x5d,0xd4]
+          vminps ymm2 {k7}, ymm3, ymm4, {sae}
+
+// CHECK: vminps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x9f,0x5d,0xd4]
+          vminps ymm2 {k7} {z}, ymm3, ymm4, {sae}
+
+// CHECK: vmulpd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x59,0xd4]
+          vmulpd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vmulpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x3f,0x59,0xd4]
+          vmulpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vmulpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0xff,0x59,0xd4]
+          vmulpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vmulph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x59,0xd4]
+          vmulph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vmulph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x3f,0x59,0xd4]
+          vmulph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vmulph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0xff,0x59,0xd4]
+          vmulph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vmulps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x59,0xd4]
+          vmulps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vmulps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x3f,0x59,0xd4]
+          vmulps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vmulps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0xff,0x59,0xd4]
+          vmulps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vrangepd ymm2, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x18,0x50,0xd4,0x7b]
+          vrangepd ymm2, ymm3, ymm4, {sae}, 123
+
+// CHECK: vrangepd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x1f,0x50,0xd4,0x7b]
+          vrangepd ymm2 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vrangepd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xe1,0x9f,0x50,0xd4,0x7b]
+          vrangepd ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vrangeps ymm2, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x18,0x50,0xd4,0x7b]
+          vrangeps ymm2, ymm3, ymm4, {sae}, 123
+
+// CHECK: vrangeps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x1f,0x50,0xd4,0x7b]
+          vrangeps ymm2 {k7}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vrangeps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x61,0x9f,0x50,0xd4,0x7b]
+          vrangeps ymm2 {k7} {z}, ymm3, ymm4, {sae}, 123
+
+// CHECK: vreducepd ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x18,0x56,0xd3,0x7b]
+          vreducepd ymm2, ymm3, {sae}, 123
+
+// CHECK: vreducepd ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x1f,0x56,0xd3,0x7b]
+          vreducepd ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vreducepd ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x9f,0x56,0xd3,0x7b]
+          vreducepd ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vreduceph ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x18,0x56,0xd3,0x7b]
+          vreduceph ymm2, ymm3, {sae}, 123
+
+// CHECK: vreduceph ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x1f,0x56,0xd3,0x7b]
+          vreduceph ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vreduceph ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x9f,0x56,0xd3,0x7b]
+          vreduceph ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vreduceps ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x18,0x56,0xd3,0x7b]
+          vreduceps ymm2, ymm3, {sae}, 123
+
+// CHECK: vreduceps ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x1f,0x56,0xd3,0x7b]
+          vreduceps ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vreduceps ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x9f,0x56,0xd3,0x7b]
+          vreduceps ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vrndscalepd ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x18,0x09,0xd3,0x7b]
+          vrndscalepd ymm2, ymm3, {sae}, 123
+
+// CHECK: vrndscalepd ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x1f,0x09,0xd3,0x7b]
+          vrndscalepd ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vrndscalepd ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0xf9,0x9f,0x09,0xd3,0x7b]
+          vrndscalepd ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vrndscaleph ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x18,0x08,0xd3,0x7b]
+          vrndscaleph ymm2, ymm3, {sae}, 123
+
+// CHECK: vrndscaleph ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x1f,0x08,0xd3,0x7b]
+          vrndscaleph ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vrndscaleph ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x78,0x9f,0x08,0xd3,0x7b]
+          vrndscaleph ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vrndscaleps ymm2, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x18,0x08,0xd3,0x7b]
+          vrndscaleps ymm2, ymm3, {sae}, 123
+
+// CHECK: vrndscaleps ymm2 {k7}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x1f,0x08,0xd3,0x7b]
+          vrndscaleps ymm2 {k7}, ymm3, {sae}, 123
+
+// CHECK: vrndscaleps ymm2 {k7} {z}, ymm3, {sae}, 123
+// CHECK: encoding: [0x62,0xf3,0x79,0x9f,0x08,0xd3,0x7b]
+          vrndscaleps ymm2 {k7} {z}, ymm3, {sae}, 123
+
+// CHECK: vscalefpd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x18,0x2c,0xd4]
+          vscalefpd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vscalefpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0x3f,0x2c,0xd4]
+          vscalefpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vscalefpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0xe1,0xff,0x2c,0xd4]
+          vscalefpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vscalefph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x18,0x2c,0xd4]
+          vscalefph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vscalefph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0x3f,0x2c,0xd4]
+          vscalefph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vscalefph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf6,0x61,0xff,0x2c,0xd4]
+          vscalefph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vscalefps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x18,0x2c,0xd4]
+          vscalefps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vscalefps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0x3f,0x2c,0xd4]
+          vscalefps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vscalefps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf2,0x61,0xff,0x2c,0xd4]
+          vscalefps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vsqrtpd ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x18,0x51,0xd3]
+          vsqrtpd ymm2, ymm3, {rn-sae}
+
+// CHECK: vsqrtpd ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0x3f,0x51,0xd3]
+          vsqrtpd ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vsqrtpd ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xf9,0xff,0x51,0xd3]
+          vsqrtpd ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vsqrtph ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x51,0xd3]
+          vsqrtph ymm2, ymm3, {rn-sae}
+
+// CHECK: vsqrtph ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x3f,0x51,0xd3]
+          vsqrtph ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vsqrtph ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0xff,0x51,0xd3]
+          vsqrtph ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vsqrtps ymm2, ymm3, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x18,0x51,0xd3]
+          vsqrtps ymm2, ymm3, {rn-sae}
+
+// CHECK: vsqrtps ymm2 {k7}, ymm3, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0x3f,0x51,0xd3]
+          vsqrtps ymm2 {k7}, ymm3, {rd-sae}
+
+// CHECK: vsqrtps ymm2 {k7} {z}, ymm3, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x78,0xff,0x51,0xd3]
+          vsqrtps ymm2 {k7} {z}, ymm3, {rz-sae}
+
+// CHECK: vsubpd ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x18,0x5c,0xd4]
+          vsubpd ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vsubpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0x3f,0x5c,0xd4]
+          vsubpd ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vsubpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0xe1,0xff,0x5c,0xd4]
+          vsubpd ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vsubph ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x18,0x5c,0xd4]
+          vsubph ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vsubph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0x3f,0x5c,0xd4]
+          vsubph ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vsubph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf5,0x60,0xff,0x5c,0xd4]
+          vsubph ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+
+// CHECK: vsubps ymm2, ymm3, ymm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x18,0x5c,0xd4]
+          vsubps ymm2, ymm3, ymm4, {rn-sae}
+
+// CHECK: vsubps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0x3f,0x5c,0xd4]
+          vsubps ymm2 {k7}, ymm3, ymm4, {rd-sae}
+
+// CHECK: vsubps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}
+// CHECK: encoding: [0x62,0xf1,0x60,0xff,0x5c,0xd4]
+          vsubps ymm2 {k7} {z}, ymm3, ymm4, {rz-sae}

diff  --git a/llvm/test/MC/X86/avx10_2ni-64-att.s b/llvm/test/MC/X86/avx10_2ni-64-att.s
index 8ee4bc3f64127..09566eb50ddad 100644
--- a/llvm/test/MC/X86/avx10_2ni-64-att.s
+++ b/llvm/test/MC/X86/avx10_2ni-64-att.s
@@ -147,3 +147,1743 @@
 // CHECK: vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
 // CHECK: encoding: [0x62,0x81,0x40,0xf7,0x58,0xf0]
           vaddps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcmppd $123, {sae}, %ymm24, %ymm23, %k5
+// CHECK: encoding: [0x62,0x91,0xc1,0x10,0xc2,0xe8,0x7b]
+          vcmppd $123, {sae}, %ymm24, %ymm23, %k5
+
+// CHECK: vcmppd $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x91,0xc1,0x17,0xc2,0xe8,0x7b]
+          vcmppd $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+
+// CHECK: vcmpph $123, {sae}, %ymm24, %ymm23, %k5
+// CHECK: encoding: [0x62,0x93,0x40,0x10,0xc2,0xe8,0x7b]
+          vcmpph $123, {sae}, %ymm24, %ymm23, %k5
+
+// CHECK: vcmpph $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x93,0x40,0x17,0xc2,0xe8,0x7b]
+          vcmpph $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+
+// CHECK: vcmpps $123, {sae}, %ymm24, %ymm23, %k5
+// CHECK: encoding: [0x62,0x91,0x40,0x10,0xc2,0xe8,0x7b]
+          vcmpps $123, {sae}, %ymm24, %ymm23, %k5
+
+// CHECK: vcmpps $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+// CHECK: encoding: [0x62,0x91,0x40,0x17,0xc2,0xe8,0x7b]
+          vcmpps $123, {sae}, %ymm24, %ymm23, %k5 {%k7}
+
+// CHECK: vcvtdq2ph {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x5b,0xf7]
+          vcvtdq2ph {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtdq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x3f,0x5b,0xf7]
+          vcvtdq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtdq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0xff,0x5b,0xf7]
+          vcvtdq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtdq2ps {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x78,0x18,0x5b,0xf7]
+          vcvtdq2ps {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtdq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x78,0x3f,0x5b,0xf7]
+          vcvtdq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtdq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x78,0xff,0x5b,0xf7]
+          vcvtdq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtpd2dq {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfb,0x18,0xe6,0xf7]
+          vcvtpd2dq {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtpd2dq {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfb,0x3f,0xe6,0xf7]
+          vcvtpd2dq {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtpd2dq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfb,0xff,0xe6,0xf7]
+          vcvtpd2dq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtpd2ph {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xf9,0x18,0x5a,0xf7]
+          vcvtpd2ph {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtpd2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x3f,0x5a,0xf7]
+          vcvtpd2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtpd2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf9,0xff,0x5a,0xf7]
+          vcvtpd2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtpd2ps {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x5a,0xf7]
+          vcvtpd2ps {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtpd2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x3f,0x5a,0xf7]
+          vcvtpd2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtpd2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0xff,0x5a,0xf7]
+          vcvtpd2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtpd2qq {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x7b,0xf7]
+          vcvtpd2qq {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtpd2qq {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x3f,0x7b,0xf7]
+          vcvtpd2qq {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtpd2qq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0xff,0x7b,0xf7]
+          vcvtpd2qq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtpd2udq {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xf8,0x18,0x79,0xf7]
+          vcvtpd2udq {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtpd2udq {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf8,0x3f,0x79,0xf7]
+          vcvtpd2udq {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtpd2udq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf8,0xff,0x79,0xf7]
+          vcvtpd2udq {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtpd2uqq {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x79,0xf7]
+          vcvtpd2uqq {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtpd2uqq {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x3f,0x79,0xf7]
+          vcvtpd2uqq {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtpd2uqq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0xff,0x79,0xf7]
+          vcvtpd2uqq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2dq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x5b,0xf7]
+          vcvtph2dq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2dq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x3f,0x5b,0xf7]
+          vcvtph2dq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2dq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0xff,0x5b,0xf7]
+          vcvtph2dq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2pd {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x5a,0xf7]
+          vcvtph2pd {sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2pd {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x1f,0x5a,0xf7]
+          vcvtph2pd {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x5a,0xf7]
+          vcvtph2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2ps {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x79,0x18,0x13,0xf7]
+          vcvtph2ps {sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2ps {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x79,0x1f,0x13,0xf7]
+          vcvtph2ps {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2ps {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x79,0x9f,0x13,0xf7]
+          vcvtph2ps {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2psx {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x79,0x18,0x13,0xf7]
+          vcvtph2psx {sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2psx {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x79,0x1f,0x13,0xf7]
+          vcvtph2psx {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2psx {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x79,0x9f,0x13,0xf7]
+          vcvtph2psx {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2qq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x7b,0xf7]
+          vcvtph2qq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x3f,0x7b,0xf7]
+          vcvtph2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0xff,0x7b,0xf7]
+          vcvtph2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2udq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x79,0xf7]
+          vcvtph2udq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2udq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x3f,0x79,0xf7]
+          vcvtph2udq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2udq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0xff,0x79,0xf7]
+          vcvtph2udq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2uqq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x79,0xf7]
+          vcvtph2uqq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtph2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x3f,0x79,0xf7]
+          vcvtph2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0xff,0x79,0xf7]
+          vcvtph2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2uw {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x7d,0xf7]
+          vcvtph2uw {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtph2uw {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x3f,0x7d,0xf7]
+          vcvtph2uw {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2uw {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0xff,0x7d,0xf7]
+          vcvtph2uw {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtph2w {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x7d,0xf7]
+          vcvtph2w {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtph2w {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x3f,0x7d,0xf7]
+          vcvtph2w {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtph2w {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0xff,0x7d,0xf7]
+          vcvtph2w {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtps2dq {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x79,0x18,0x5b,0xf7]
+          vcvtps2dq {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtps2dq {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x79,0x3f,0x5b,0xf7]
+          vcvtps2dq {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtps2dq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x79,0xff,0x5b,0xf7]
+          vcvtps2dq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtps2pd {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x78,0x18,0x5a,0xf7]
+          vcvtps2pd {sae}, %xmm23, %ymm22
+
+// CHECK: vcvtps2pd {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x78,0x1f,0x5a,0xf7]
+          vcvtps2pd {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtps2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x78,0x9f,0x5a,0xf7]
+          vcvtps2pd {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtps2ph $123, {sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x79,0x18,0x1d,0xfe,0x7b]
+          vcvtps2ph $123, {sae}, %ymm23, %xmm22
+
+// CHECK: vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x79,0x1f,0x1d,0xfe,0x7b]
+          vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x79,0x9f,0x1d,0xfe,0x7b]
+          vcvtps2ph $123, {sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtps2phx {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x1d,0xf7]
+          vcvtps2phx {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtps2phx {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x3f,0x1d,0xf7]
+          vcvtps2phx {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtps2phx {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0xff,0x1d,0xf7]
+          vcvtps2phx {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtps2qq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x79,0x18,0x7b,0xf7]
+          vcvtps2qq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtps2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x79,0x3f,0x7b,0xf7]
+          vcvtps2qq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtps2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x79,0xff,0x7b,0xf7]
+          vcvtps2qq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtps2udq {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x78,0x18,0x79,0xf7]
+          vcvtps2udq {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtps2udq {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x78,0x3f,0x79,0xf7]
+          vcvtps2udq {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtps2udq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x78,0xff,0x79,0xf7]
+          vcvtps2udq {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtps2uqq {rn-sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x79,0x18,0x79,0xf7]
+          vcvtps2uqq {rn-sae}, %xmm23, %ymm22
+
+// CHECK: vcvtps2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x79,0x3f,0x79,0xf7]
+          vcvtps2uqq {rd-sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvtps2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x79,0xff,0x79,0xf7]
+          vcvtps2uqq {rz-sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtqq2pd {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xfa,0x18,0xe6,0xf7]
+          vcvtqq2pd {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfa,0x3f,0xe6,0xf7]
+          vcvtqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfa,0xff,0xe6,0xf7]
+          vcvtqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtqq2ph {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xf8,0x18,0x5b,0xf7]
+          vcvtqq2ph {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x3f,0x5b,0xf7]
+          vcvtqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf8,0xff,0x5b,0xf7]
+          vcvtqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtqq2ps {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xf8,0x18,0x5b,0xf7]
+          vcvtqq2ps {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf8,0x3f,0x5b,0xf7]
+          vcvtqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf8,0xff,0x5b,0xf7]
+          vcvtqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dq {sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0xe6,0xf7]
+          vcvttpd2dq {sae}, %ymm23, %xmm22
+
+// CHECK: vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x1f,0xe6,0xf7]
+          vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x9f,0xe6,0xf7]
+          vcvttpd2dq {sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qq {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x7a,0xf7]
+          vcvttpd2qq {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x1f,0x7a,0xf7]
+          vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x9f,0x7a,0xf7]
+          vcvttpd2qq {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2udq {sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xf8,0x18,0x78,0xf7]
+          vcvttpd2udq {sae}, %ymm23, %xmm22
+
+// CHECK: vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf8,0x1f,0x78,0xf7]
+          vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf8,0x9f,0x78,0xf7]
+          vcvttpd2udq {sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqq {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x78,0xf7]
+          vcvttpd2uqq {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x1f,0x78,0xf7]
+          vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x9f,0x78,0xf7]
+          vcvttpd2uqq {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2dq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7a,0x18,0x5b,0xf7]
+          vcvttph2dq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttph2dq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7a,0x1f,0x5b,0xf7]
+          vcvttph2dq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2dq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7a,0x9f,0x5b,0xf7]
+          vcvttph2dq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2qq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x7a,0xf7]
+          vcvttph2qq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttph2qq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x1f,0x7a,0xf7]
+          vcvttph2qq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x7a,0xf7]
+          vcvttph2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2udq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x78,0xf7]
+          vcvttph2udq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttph2udq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x1f,0x78,0xf7]
+          vcvttph2udq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2udq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x78,0xf7]
+          vcvttph2udq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2uqq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x78,0xf7]
+          vcvttph2uqq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x1f,0x78,0xf7]
+          vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x78,0xf7]
+          vcvttph2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2uw {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x7c,0xf7]
+          vcvttph2uw {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttph2uw {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x1f,0x7c,0xf7]
+          vcvttph2uw {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2uw {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x7c,0xf7]
+          vcvttph2uw {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttph2w {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x7c,0xf7]
+          vcvttph2w {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttph2w {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x79,0x1f,0x7c,0xf7]
+          vcvttph2w {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttph2w {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x7c,0xf7]
+          vcvttph2w {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2dq {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x7a,0x18,0x5b,0xf7]
+          vcvttps2dq {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttps2dq {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x7a,0x1f,0x5b,0xf7]
+          vcvttps2dq {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2dq {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x7a,0x9f,0x5b,0xf7]
+          vcvttps2dq {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2qq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x79,0x18,0x7a,0xf7]
+          vcvttps2qq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttps2qq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x79,0x1f,0x7a,0xf7]
+          vcvttps2qq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x79,0x9f,0x7a,0xf7]
+          vcvttps2qq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2udq {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x78,0x18,0x78,0xf7]
+          vcvttps2udq {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttps2udq {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x78,0x1f,0x78,0xf7]
+          vcvttps2udq {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2udq {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x78,0x9f,0x78,0xf7]
+          vcvttps2udq {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2uqq {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x79,0x18,0x78,0xf7]
+          vcvttps2uqq {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x79,0x1f,0x78,0xf7]
+          vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x79,0x9f,0x78,0xf7]
+          vcvttps2uqq {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtudq2ph {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7b,0x18,0x7a,0xf7]
+          vcvtudq2ph {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtudq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7b,0x3f,0x7a,0xf7]
+          vcvtudq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtudq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7b,0xff,0x7a,0xf7]
+          vcvtudq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtudq2ps {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x7b,0x18,0x7a,0xf7]
+          vcvtudq2ps {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtudq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x7b,0x3f,0x7a,0xf7]
+          vcvtudq2ps {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtudq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x7b,0xff,0x7a,0xf7]
+          vcvtudq2ps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtuqq2pd {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xfa,0x18,0x7a,0xf7]
+          vcvtuqq2pd {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtuqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfa,0x3f,0x7a,0xf7]
+          vcvtuqq2pd {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtuqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfa,0xff,0x7a,0xf7]
+          vcvtuqq2pd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtuqq2ph {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfb,0x18,0x7a,0xf7]
+          vcvtuqq2ph {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtuqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfb,0x3f,0x7a,0xf7]
+          vcvtuqq2ph {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtuqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfb,0xff,0x7a,0xf7]
+          vcvtuqq2ph {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtuqq2ps {rn-sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfb,0x18,0x7a,0xf7]
+          vcvtuqq2ps {rn-sae}, %ymm23, %xmm22
+
+// CHECK: vcvtuqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfb,0x3f,0x7a,0xf7]
+          vcvtuqq2ps {rd-sae}, %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvtuqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfb,0xff,0x7a,0xf7]
+          vcvtuqq2ps {rz-sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvtuw2ph {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7b,0x18,0x7d,0xf7]
+          vcvtuw2ph {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtuw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7b,0x3f,0x7d,0xf7]
+          vcvtuw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtuw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7b,0xff,0x7d,0xf7]
+          vcvtuw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvtw2ph {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7a,0x18,0x7d,0xf7]
+          vcvtw2ph {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vcvtw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7a,0x3f,0x7d,0xf7]
+          vcvtw2ph {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvtw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7a,0xff,0x7d,0xf7]
+          vcvtw2ph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivpd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x5e,0xf0]
+          vdivpd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vdivpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x37,0x5e,0xf0]
+          vdivpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0xf7,0x5e,0xf0]
+          vdivpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x5e,0xf0]
+          vdivph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vdivph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x37,0x5e,0xf0]
+          vdivph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0xf7,0x5e,0xf0]
+          vdivph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vdivps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x5e,0xf0]
+          vdivps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vdivps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x37,0x5e,0xf0]
+          vdivps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vdivps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0xf7,0x5e,0xf0]
+          vdivps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfcmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x43,0x10,0x56,0xf0]
+          vfcmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfcmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x43,0x37,0x56,0xf0]
+          vfcmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfcmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x43,0xf7,0x56,0xf0]
+          vfcmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfcmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x43,0x10,0xd6,0xf0]
+          vfcmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfcmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x43,0x37,0xd6,0xf0]
+          vfcmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfcmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x43,0xf7,0xd6,0xf0]
+          vfcmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x83,0xc1,0x10,0x54,0xf0,0x7b]
+          vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0xc1,0x17,0x54,0xf0,0x7b]
+          vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0xc1,0x97,0x54,0xf0,0x7b]
+          vfixupimmpd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x83,0x41,0x10,0x54,0xf0,0x7b]
+          vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0x41,0x17,0x54,0xf0,0x7b]
+          vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x41,0x97,0x54,0xf0,0x7b]
+          vfixupimmps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x98,0xf0]
+          vfmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x98,0xf0]
+          vfmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x98,0xf0]
+          vfmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x98,0xf0]
+          vfmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x98,0xf0]
+          vfmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x98,0xf0]
+          vfmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x98,0xf0]
+          vfmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x98,0xf0]
+          vfmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x98,0xf0]
+          vfmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xa8,0xf0]
+          vfmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xa8,0xf0]
+          vfmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xa8,0xf0]
+          vfmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xa8,0xf0]
+          vfmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xa8,0xf0]
+          vfmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xa8,0xf0]
+          vfmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xa8,0xf0]
+          vfmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xa8,0xf0]
+          vfmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xa8,0xf0]
+          vfmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xb8,0xf0]
+          vfmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xb8,0xf0]
+          vfmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xb8,0xf0]
+          vfmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xb8,0xf0]
+          vfmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xb8,0xf0]
+          vfmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xb8,0xf0]
+          vfmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xb8,0xf0]
+          vfmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xb8,0xf0]
+          vfmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xb8,0xf0]
+          vfmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x42,0x10,0x56,0xf0]
+          vfmaddcph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x42,0x37,0x56,0xf0]
+          vfmaddcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x42,0xf7,0x56,0xf0]
+          vfmaddcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x96,0xf0]
+          vfmaddsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x96,0xf0]
+          vfmaddsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x96,0xf0]
+          vfmaddsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x96,0xf0]
+          vfmaddsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x96,0xf0]
+          vfmaddsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x96,0xf0]
+          vfmaddsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x96,0xf0]
+          vfmaddsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x96,0xf0]
+          vfmaddsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x96,0xf0]
+          vfmaddsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xa6,0xf0]
+          vfmaddsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xa6,0xf0]
+          vfmaddsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xa6,0xf0]
+          vfmaddsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xa6,0xf0]
+          vfmaddsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xa6,0xf0]
+          vfmaddsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xa6,0xf0]
+          vfmaddsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xa6,0xf0]
+          vfmaddsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xa6,0xf0]
+          vfmaddsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xa6,0xf0]
+          vfmaddsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xb6,0xf0]
+          vfmaddsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xb6,0xf0]
+          vfmaddsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xb6,0xf0]
+          vfmaddsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xb6,0xf0]
+          vfmaddsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xb6,0xf0]
+          vfmaddsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xb6,0xf0]
+          vfmaddsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmaddsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xb6,0xf0]
+          vfmaddsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmaddsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xb6,0xf0]
+          vfmaddsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmaddsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xb6,0xf0]
+          vfmaddsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x9a,0xf0]
+          vfmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x9a,0xf0]
+          vfmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x9a,0xf0]
+          vfmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x9a,0xf0]
+          vfmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x9a,0xf0]
+          vfmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x9a,0xf0]
+          vfmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x9a,0xf0]
+          vfmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x9a,0xf0]
+          vfmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x9a,0xf0]
+          vfmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xaa,0xf0]
+          vfmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xaa,0xf0]
+          vfmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xaa,0xf0]
+          vfmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xaa,0xf0]
+          vfmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xaa,0xf0]
+          vfmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xaa,0xf0]
+          vfmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xaa,0xf0]
+          vfmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xaa,0xf0]
+          vfmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xaa,0xf0]
+          vfmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xba,0xf0]
+          vfmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xba,0xf0]
+          vfmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xba,0xf0]
+          vfmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xba,0xf0]
+          vfmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xba,0xf0]
+          vfmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xba,0xf0]
+          vfmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xba,0xf0]
+          vfmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xba,0xf0]
+          vfmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xba,0xf0]
+          vfmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x97,0xf0]
+          vfmsubadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x97,0xf0]
+          vfmsubadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x97,0xf0]
+          vfmsubadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x97,0xf0]
+          vfmsubadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x97,0xf0]
+          vfmsubadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x97,0xf0]
+          vfmsubadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x97,0xf0]
+          vfmsubadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x97,0xf0]
+          vfmsubadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x97,0xf0]
+          vfmsubadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xa7,0xf0]
+          vfmsubadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xa7,0xf0]
+          vfmsubadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xa7,0xf0]
+          vfmsubadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xa7,0xf0]
+          vfmsubadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xa7,0xf0]
+          vfmsubadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xa7,0xf0]
+          vfmsubadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xa7,0xf0]
+          vfmsubadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xa7,0xf0]
+          vfmsubadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xa7,0xf0]
+          vfmsubadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xb7,0xf0]
+          vfmsubadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xb7,0xf0]
+          vfmsubadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xb7,0xf0]
+          vfmsubadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xb7,0xf0]
+          vfmsubadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xb7,0xf0]
+          vfmsubadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xb7,0xf0]
+          vfmsubadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmsubadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xb7,0xf0]
+          vfmsubadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmsubadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xb7,0xf0]
+          vfmsubadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmsubadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xb7,0xf0]
+          vfmsubadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x42,0x10,0xd6,0xf0]
+          vfmulcph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x42,0x37,0xd6,0xf0]
+          vfmulcph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x42,0xf7,0xd6,0xf0]
+          vfmulcph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x9c,0xf0]
+          vfnmadd132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x9c,0xf0]
+          vfnmadd132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x9c,0xf0]
+          vfnmadd132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x9c,0xf0]
+          vfnmadd132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x9c,0xf0]
+          vfnmadd132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x9c,0xf0]
+          vfnmadd132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x9c,0xf0]
+          vfnmadd132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x9c,0xf0]
+          vfnmadd132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x9c,0xf0]
+          vfnmadd132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xac,0xf0]
+          vfnmadd213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xac,0xf0]
+          vfnmadd213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xac,0xf0]
+          vfnmadd213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xac,0xf0]
+          vfnmadd213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xac,0xf0]
+          vfnmadd213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xac,0xf0]
+          vfnmadd213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xac,0xf0]
+          vfnmadd213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xac,0xf0]
+          vfnmadd213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xac,0xf0]
+          vfnmadd213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xbc,0xf0]
+          vfnmadd231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xbc,0xf0]
+          vfnmadd231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xbc,0xf0]
+          vfnmadd231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xbc,0xf0]
+          vfnmadd231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xbc,0xf0]
+          vfnmadd231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xbc,0xf0]
+          vfnmadd231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xbc,0xf0]
+          vfnmadd231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xbc,0xf0]
+          vfnmadd231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xbc,0xf0]
+          vfnmadd231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x9e,0xf0]
+          vfnmsub132pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x9e,0xf0]
+          vfnmsub132pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x9e,0xf0]
+          vfnmsub132pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x9e,0xf0]
+          vfnmsub132ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x9e,0xf0]
+          vfnmsub132ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x9e,0xf0]
+          vfnmsub132ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x9e,0xf0]
+          vfnmsub132ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x9e,0xf0]
+          vfnmsub132ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x9e,0xf0]
+          vfnmsub132ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xae,0xf0]
+          vfnmsub213pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xae,0xf0]
+          vfnmsub213pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xae,0xf0]
+          vfnmsub213pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xae,0xf0]
+          vfnmsub213ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xae,0xf0]
+          vfnmsub213ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xae,0xf0]
+          vfnmsub213ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xae,0xf0]
+          vfnmsub213ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xae,0xf0]
+          vfnmsub213ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xae,0xf0]
+          vfnmsub213ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0xbe,0xf0]
+          vfnmsub231pd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0xbe,0xf0]
+          vfnmsub231pd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0xbe,0xf0]
+          vfnmsub231pd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0xbe,0xf0]
+          vfnmsub231ph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0xbe,0xf0]
+          vfnmsub231ph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0xbe,0xf0]
+          vfnmsub231ph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vfnmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0xbe,0xf0]
+          vfnmsub231ps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vfnmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0xbe,0xf0]
+          vfnmsub231ps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vfnmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0xbe,0xf0]
+          vfnmsub231ps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetexppd {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0xf9,0x18,0x42,0xf7]
+          vgetexppd {sae}, %ymm23, %ymm22
+
+// CHECK: vgetexppd {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0xf9,0x1f,0x42,0xf7]
+          vgetexppd {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetexppd {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0xf9,0x9f,0x42,0xf7]
+          vgetexppd {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetexpph {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa6,0x79,0x18,0x42,0xf7]
+          vgetexpph {sae}, %ymm23, %ymm22
+
+// CHECK: vgetexpph {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa6,0x79,0x1f,0x42,0xf7]
+          vgetexpph {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetexpph {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa6,0x79,0x9f,0x42,0xf7]
+          vgetexpph {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetexpps {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x79,0x18,0x42,0xf7]
+          vgetexpps {sae}, %ymm23, %ymm22
+
+// CHECK: vgetexpps {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x79,0x1f,0x42,0xf7]
+          vgetexpps {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetexpps {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x79,0x9f,0x42,0xf7]
+          vgetexpps {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantpd $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0xf9,0x18,0x26,0xf7,0x7b]
+          vgetmantpd $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x1f,0x26,0xf7,0x7b]
+          vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x9f,0x26,0xf7,0x7b]
+          vgetmantpd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantph $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x78,0x18,0x26,0xf7,0x7b]
+          vgetmantph $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x78,0x1f,0x26,0xf7,0x7b]
+          vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x78,0x9f,0x26,0xf7,0x7b]
+          vgetmantph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vgetmantps $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x79,0x18,0x26,0xf7,0x7b]
+          vgetmantps $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x79,0x1f,0x26,0xf7,0x7b]
+          vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x79,0x9f,0x26,0xf7,0x7b]
+          vgetmantps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxpd {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x5f,0xf0]
+          vmaxpd {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x17,0x5f,0xf0]
+          vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0x97,0x5f,0xf0]
+          vmaxpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxph {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x5f,0xf0]
+          vmaxph {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x17,0x5f,0xf0]
+          vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0x97,0x5f,0xf0]
+          vmaxph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmaxps {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x5f,0xf0]
+          vmaxps {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x17,0x5f,0xf0]
+          vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0x97,0x5f,0xf0]
+          vmaxps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminpd {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x5d,0xf0]
+          vminpd {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x17,0x5d,0xf0]
+          vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0x97,0x5d,0xf0]
+          vminpd {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminph {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x5d,0xf0]
+          vminph {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x17,0x5d,0xf0]
+          vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0x97,0x5d,0xf0]
+          vminph {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vminps {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x5d,0xf0]
+          vminps {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x17,0x5d,0xf0]
+          vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0x97,0x5d,0xf0]
+          vminps {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulpd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x59,0xf0]
+          vmulpd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmulpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x37,0x59,0xf0]
+          vmulpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0xf7,0x59,0xf0]
+          vmulpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x59,0xf0]
+          vmulph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmulph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x37,0x59,0xf0]
+          vmulph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0xf7,0x59,0xf0]
+          vmulph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vmulps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x59,0xf0]
+          vmulps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vmulps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x37,0x59,0xf0]
+          vmulps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vmulps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0xf7,0x59,0xf0]
+          vmulps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x83,0xc1,0x10,0x50,0xf0,0x7b]
+          vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0xc1,0x17,0x50,0xf0,0x7b]
+          vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0xc1,0x97,0x50,0xf0,0x7b]
+          vrangepd $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x83,0x41,0x10,0x50,0xf0,0x7b]
+          vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x83,0x41,0x17,0x50,0xf0,0x7b]
+          vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x41,0x97,0x50,0xf0,0x7b]
+          vrangeps $123, {sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vreducepd $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0xf9,0x18,0x56,0xf7,0x7b]
+          vreducepd $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vreducepd $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x1f,0x56,0xf7,0x7b]
+          vreducepd $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vreducepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x9f,0x56,0xf7,0x7b]
+          vreducepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vreduceph $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x78,0x18,0x56,0xf7,0x7b]
+          vreduceph $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vreduceph $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x78,0x1f,0x56,0xf7,0x7b]
+          vreduceph $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vreduceph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x78,0x9f,0x56,0xf7,0x7b]
+          vreduceph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vreduceps $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x79,0x18,0x56,0xf7,0x7b]
+          vreduceps $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vreduceps $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x79,0x1f,0x56,0xf7,0x7b]
+          vreduceps $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vreduceps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x79,0x9f,0x56,0xf7,0x7b]
+          vreduceps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrndscalepd $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0xf9,0x18,0x09,0xf7,0x7b]
+          vrndscalepd $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x1f,0x09,0xf7,0x7b]
+          vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xf9,0x9f,0x09,0xf7,0x7b]
+          vrndscalepd $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrndscaleph $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x78,0x18,0x08,0xf7,0x7b]
+          vrndscaleph $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x78,0x1f,0x08,0xf7,0x7b]
+          vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x78,0x9f,0x08,0xf7,0x7b]
+          vrndscaleph $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vrndscaleps $123, {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa3,0x79,0x18,0x08,0xf7,0x7b]
+          vrndscaleps $123, {sae}, %ymm23, %ymm22
+
+// CHECK: vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x79,0x1f,0x08,0xf7,0x7b]
+          vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x79,0x9f,0x08,0xf7,0x7b]
+          vrndscaleps $123, {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefpd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0xc1,0x10,0x2c,0xf0]
+          vscalefpd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vscalefpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0xc1,0x37,0x2c,0xf0]
+          vscalefpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xc1,0xf7,0x2c,0xf0]
+          vscalefpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x86,0x41,0x10,0x2c,0xf0]
+          vscalefph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vscalefph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x86,0x41,0x37,0x2c,0xf0]
+          vscalefph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x86,0x41,0xf7,0x2c,0xf0]
+          vscalefph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vscalefps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x41,0x10,0x2c,0xf0]
+          vscalefps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vscalefps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x82,0x41,0x37,0x2c,0xf0]
+          vscalefps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vscalefps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x41,0xf7,0x2c,0xf0]
+          vscalefps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtpd {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0xf9,0x18,0x51,0xf7]
+          vsqrtpd {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vsqrtpd {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xf9,0x3f,0x51,0xf7]
+          vsqrtpd {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsqrtpd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xf9,0xff,0x51,0xf7]
+          vsqrtpd {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtph {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x51,0xf7]
+          vsqrtph {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vsqrtph {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x78,0x3f,0x51,0xf7]
+          vsqrtph {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsqrtph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0xff,0x51,0xf7]
+          vsqrtph {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsqrtps {rn-sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x78,0x18,0x51,0xf7]
+          vsqrtps {rn-sae}, %ymm23, %ymm22
+
+// CHECK: vsqrtps {rd-sae}, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x78,0x3f,0x51,0xf7]
+          vsqrtps {rd-sae}, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsqrtps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x78,0xff,0x51,0xf7]
+          vsqrtps {rz-sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubpd {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0xc1,0x10,0x5c,0xf0]
+          vsubpd {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vsubpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0xc1,0x37,0x5c,0xf0]
+          vsubpd {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xc1,0xf7,0x5c,0xf0]
+          vsubpd {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubph {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x85,0x40,0x10,0x5c,0xf0]
+          vsubph {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vsubph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x85,0x40,0x37,0x5c,0xf0]
+          vsubph {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x85,0x40,0xf7,0x5c,0xf0]
+          vsubph {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vsubps {rn-sae}, %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x81,0x40,0x10,0x5c,0xf0]
+          vsubps {rn-sae}, %ymm24, %ymm23, %ymm22
+
+// CHECK: vsubps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0x81,0x40,0x37,0x5c,0xf0]
+          vsubps {rd-sae}, %ymm24, %ymm23, %ymm22 {%k7}
+
+// CHECK: vsubps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x40,0xf7,0x5c,0xf0]
+          vsubps {rz-sae}, %ymm24, %ymm23, %ymm22 {%k7} {z}

diff  --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index b0acd4ea4224a..21c5e3297b23f 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -710,7 +710,7 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
                                            unsigned &i1, unsigned &i2,
                                            unsigned &ModRMTableNum,
                                            ModRMDecision &decision) const {
-  static uint32_t sEntryNumber = 1;
+  static uint64_t sEntryNumber = 1;
   ModRMDecisionType dt = getDecisionType(decision);
 
   if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) {
@@ -785,9 +785,9 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
     break;
   }
 
-  // We assume that the index can fit into uint16_t.
-  assert(sEntryNumber < 65536U &&
-         "Index into ModRMDecision is too large for uint16_t!");
+  // We assume that the index can fit into uint32_t.
+  assert(sEntryNumber < -1U &&
+         "Index into ModRMDecision is too large for uint32_t!");
   (void)sEntryNumber;
 }
 


        


More information about the llvm-commits mailing list