[llvm] 2379949 - [X86] AVX512FP16 instructions enabling 3/6
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 17 18:35:19 PDT 2021
Author: Wang, Pengfei
Date: 2021-08-18T09:03:41+08:00
New Revision: 2379949aadcee8d4028dec0508f88bda290636bc
URL: https://github.com/llvm/llvm-project/commit/2379949aadcee8d4028dec0508f88bda290636bc
DIFF: https://github.com/llvm/llvm-project/commit/2379949aadcee8d4028dec0508f88bda290636bc.diff
LOG: [X86] AVX512FP16 instructions enabling 3/6
Enable FP16 conversion instructions.
Ref.: https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html
Reviewed By: LuoYuanke
Differential Revision: https://reviews.llvm.org/D105265
Added:
llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16-cvt.ll
llvm/test/CodeGen/X86/cvt16-2.ll
llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
Modified:
clang/include/clang/Basic/BuiltinsX86.def
clang/include/clang/Basic/BuiltinsX86_64.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/avx512fp16intrin.h
clang/lib/Headers/avx512vlfp16intrin.h
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/X86/avx512fp16-builtins.c
clang/test/CodeGen/X86/avx512vlfp16-builtins.c
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/include/llvm/IR/RuntimeLibcalls.def
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrFoldTables.cpp
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86IntrinsicsInfo.h
llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16-arith.ll
llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
llvm/test/MC/Disassembler/X86/avx512fp16.txt
llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
llvm/test/MC/X86/avx512fp16.s
llvm/test/MC/X86/avx512fp16vl.s
llvm/test/MC/X86/intel-syntax-avx512fp16.s
llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 594415fe80692..a0926f230d46f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1876,6 +1876,84 @@ TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512f
TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
+
// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index ce2b1decdf6ca..e0c9bec9b4e00 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -92,6 +92,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi642sh, "V8xV8xUOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi642sh, "V8xV8xOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
// UINTR
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 536a0bae13afe..08f4f644eb10d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -12723,10 +12723,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
case X86::BI__builtin_ia32_vfmaddss3:
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 58d7349c4905a..6a4a9d4a6c7eb 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -947,6 +947,996 @@ static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
return __b[0];
}
+#define _mm512_cvt_roundpd_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
+ (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
+ (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_pd(A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
+ (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_pd(U, A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
+ (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_ss(A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
+ (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
+ __mmask8 __U,
+ __m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
+ (__v4sf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
+ __m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundss_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
+ (__v8hf)_mm_undefined_ph(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
+ (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
+ __m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsd_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
+ (__v8hf)_mm_undefined_ph(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
+ (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
+ __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
+ (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
+ __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
+ __mmask8 __U,
+ __m128d __A,
+ __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi16(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_undefined_epi32(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi16(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask( \
+ (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi16_ph(A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
+ (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi16_ph(__m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu16(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
+ (__v32hu)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu16(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
+ (__v32hu)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu16_ph(A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
+ (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu16_ph(__m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi32(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
+ (__v16si)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu32(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
+ (__v16su)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi32_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
+ (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi32_ph(__m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu32_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
+ (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu32_ph(__m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
+ (__v16si)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
+ (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
+ (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
+ (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi64_ph(__m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
+ (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
+ (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
+ (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu64_ph(__m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_i32(A, R) \
+ ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
+ return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u32(__m128h __A) {
+ return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundsh_i64(A, R) \
+ ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
+ return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u64(__m128h __A) {
+ return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif // __x86_64__
+
+#define _mm_cvt_roundu32_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu32_sh(__m128h __A, unsigned int __B) {
+ __A[0] = __B;
+ return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundu64_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
+ (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
+ __A[0] = __B;
+ return __A;
+}
+#endif
+
+#define _mm_cvt_roundi32_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
+ int __B) {
+ __A[0] = __B;
+ return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundi64_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
+ long long __B) {
+ __A[0] = __B;
+ return __A;
+}
+#endif
+
+#define _mm_cvtt_roundsh_i32(A, R) \
+ ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
+ return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
+ return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm_cvtt_roundsh_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u32(__m128h __A) {
+ return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u64(__m128h __A) {
+ return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm512_cvtx_roundph_ps(A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask( \
+ (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtx_roundps_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
+ (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
static __inline__ _Float16 __DEFAULT_FN_ATTRS512
_mm512_reduce_add_ph(__m512h __W) {
return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 0f23054e6fa10..ab2cf436ee16d 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -327,6 +327,772 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
((__mmask8)__builtin_ia32_cmpph128_mask( \
(__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+ (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
+ __mmask8 __U,
+ __m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+ (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+ (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+ (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+ (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+ (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+ (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+ (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
+ return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi16_ph(__m256i __A) {
+ return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+ (__v16hf)_mm256_cvtepi16_ph(__A),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
+ return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu16_ph(__m256i __A) {
+ return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+ (__v16hf)_mm256_cvtepu16_ph(__A),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+ (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+ (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi32_ph(__m256i __A) {
+ return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+ (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+ (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu32_ph(__m256i __A) {
+ return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+ (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+ (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi64_ph(__m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+ (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+ (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+ (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+ (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu64_ph(__m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+ (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+ (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+ (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+ (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+ (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+ (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+ (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
+ __mmask8 __U,
+ __m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+ (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+ (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+ (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
__m128h __A,
__m128h __W) {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 063fd38f97c46..69560027f330a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3878,6 +3878,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttss2si64:
case X86::BI__builtin_ia32_vcvttss2usi32:
case X86::BI__builtin_ia32_vcvttss2usi64:
+ case X86::BI__builtin_ia32_vcvttsh2si32:
+ case X86::BI__builtin_ia32_vcvttsh2si64:
+ case X86::BI__builtin_ia32_vcvttsh2usi32:
+ case X86::BI__builtin_ia32_vcvttsh2usi64:
ArgNum = 1;
break;
case X86::BI__builtin_ia32_maxpd512:
@@ -3888,6 +3892,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_minph512:
ArgNum = 2;
break;
+ case X86::BI__builtin_ia32_vcvtph2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtph2psx512_mask:
case X86::BI__builtin_ia32_cvtps2pd512_mask:
case X86::BI__builtin_ia32_cvttpd2dq512_mask:
case X86::BI__builtin_ia32_cvttpd2qq512_mask:
@@ -3897,6 +3903,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvttps2qq512_mask:
case X86::BI__builtin_ia32_cvttps2udq512_mask:
case X86::BI__builtin_ia32_cvttps2uqq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2w512_mask:
+ case X86::BI__builtin_ia32_vcvttph2uw512_mask:
+ case X86::BI__builtin_ia32_vcvttph2dq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2udq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2qq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
case X86::BI__builtin_ia32_exp2pd_mask:
case X86::BI__builtin_ia32_exp2ps_mask:
case X86::BI__builtin_ia32_getexppd512_mask:
@@ -3916,6 +3928,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cmpsd_mask:
case X86::BI__builtin_ia32_cmpss_mask:
case X86::BI__builtin_ia32_cmpsh_mask:
+ case X86::BI__builtin_ia32_vcvtsh2sd_round_mask:
+ case X86::BI__builtin_ia32_vcvtsh2ss_round_mask:
case X86::BI__builtin_ia32_cvtss2sd_round_mask:
case X86::BI__builtin_ia32_getexpsd128_round_mask:
case X86::BI__builtin_ia32_getexpss128_round_mask:
@@ -3965,6 +3979,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvtss2si64:
case X86::BI__builtin_ia32_vcvtss2usi32:
case X86::BI__builtin_ia32_vcvtss2usi64:
+ case X86::BI__builtin_ia32_vcvtsh2si32:
+ case X86::BI__builtin_ia32_vcvtsh2si64:
+ case X86::BI__builtin_ia32_vcvtsh2usi32:
+ case X86::BI__builtin_ia32_vcvtsh2usi64:
case X86::BI__builtin_ia32_sqrtpd512:
case X86::BI__builtin_ia32_sqrtps512:
ArgNum = 1;
@@ -3988,11 +4006,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtusi2sd64:
case X86::BI__builtin_ia32_cvtusi2ss32:
case X86::BI__builtin_ia32_cvtusi2ss64:
+ case X86::BI__builtin_ia32_vcvtusi2sh:
+ case X86::BI__builtin_ia32_vcvtusi642sh:
+ case X86::BI__builtin_ia32_vcvtsi2sh:
+ case X86::BI__builtin_ia32_vcvtsi642sh:
ArgNum = 2;
HasRC = true;
break;
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtpd2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtps2phx512_mask:
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
case X86::BI__builtin_ia32_cvtpd2dq512_mask:
case X86::BI__builtin_ia32_cvtpd2qq512_mask:
@@ -4006,6 +4030,18 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtph2w512_mask:
+ case X86::BI__builtin_ia32_vcvtph2uw512_mask:
+ case X86::BI__builtin_ia32_vcvtph2dq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2udq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2qq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
ArgNum = 3;
HasRC = true;
break;
@@ -4026,6 +4062,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_scalefsd_round_mask:
case X86::BI__builtin_ia32_scalefss_round_mask:
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
+ case X86::BI__builtin_ia32_vcvtss2sh_round_mask:
+ case X86::BI__builtin_ia32_vcvtsd2sh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
case X86::BI__builtin_ia32_vfmaddsd3_mask:
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index 4f627daff7e6c..d4fe44bc259ee 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -1542,6 +1542,1096 @@ __m128i test_mm_cvtsi16_si128(short A) {
return _mm_cvtsi16_si128(A);
}
+__m128h test_mm512_cvt_roundpd_ph(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvt_roundpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_cvt_roundpd_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundpd_ph(__m128h A, __mmask8 B, __m512d C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_mask_cvt_roundpd_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundpd_ph(__mmask8 A, __m512d B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_maskz_cvt_roundpd_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtpd_ph(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_cvtpd_ph(A);
+}
+
+__m128h test_mm512_mask_cvtpd_ph(__m128h A, __mmask8 B, __m512d C) {
+ // CHECK-LABEL: test_mm512_mask_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtpd_ph(__mmask8 A, __m512d B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+ return _mm512_maskz_cvtpd_ph(A, B);
+}
+
+__m512d test_mm512_cvt_roundph_pd(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_cvt_roundph_pd(A, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_mask_cvt_roundph_pd(__m512d A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_mask_cvt_roundph_pd(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_maskz_cvt_roundph_pd(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_maskz_cvt_roundph_pd(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_cvtph_pd(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_cvtph_pd(A);
+}
+
+__m512d test_mm512_mask_cvtph_pd(__m512d A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_mask_cvtph_pd(A, B, C);
+}
+
+__m512d test_mm512_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+ return _mm512_maskz_cvtph_pd(A, B);
+}
+
+__m128 test_mm_cvt_roundsh_ss(__m128 A, __m128h B) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_cvt_roundsh_ss(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_mask_cvt_roundsh_ss(__m128 A, __mmask8 B, __m128 C, __m128h D) {
+ // CHECK-LABEL: test_mm_mask_cvt_roundsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_mask_cvt_roundsh_ss(A, B, C, D, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_maskz_cvt_roundsh_ss(__mmask8 A, __m128 B, __m128h C) {
+ // CHECK-LABEL: test_mm_maskz_cvt_roundsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_maskz_cvt_roundsh_ss(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_cvtsh_ss(__m128 A, __m128h B) {
+ // CHECK-LABEL: test_mm_cvtsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_cvtsh_ss(A, B);
+}
+
+__m128 test_mm_mask_cvtsh_ss(__m128 A, __mmask8 B, __m128 C, __m128h D) {
+ // CHECK-LABEL: test_mm_mask_cvtsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_mask_cvtsh_ss(A, B, C, D);
+}
+
+__m128 test_mm_maskz_cvtsh_ss(__mmask8 A, __m128 B, __m128h C) {
+ // CHECK-LABEL: test_mm_maskz_cvtsh_ss
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+ return _mm_maskz_cvtsh_ss(A, B, C);
+}
+
+__m128h test_mm_cvt_roundss_sh(__m128h A, __m128 B) {
+ // CHECK-LABEL: test_mm_cvt_roundss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_cvt_roundss_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_mask_cvt_roundss_sh(__m128h A, __mmask8 B, __m128h C, __m128 D) {
+ // CHECK-LABEL: test_mm_mask_cvt_roundss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_mask_cvt_roundss_sh(A, B, C, D, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_maskz_cvt_roundss_sh(__mmask8 A, __m128h B, __m128 C) {
+ // CHECK-LABEL: test_mm_maskz_cvt_roundss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_maskz_cvt_roundss_sh(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtss_sh(__m128h A, __m128 B) {
+ // CHECK-LABEL: test_mm_cvtss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_cvtss_sh(A, B);
+}
+
+__m128h test_mm_mask_cvtss_sh(__m128h A, __mmask8 B, __m128h C, __m128 D) {
+ // CHECK-LABEL: test_mm_mask_cvtss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_mask_cvtss_sh(A, B, C, D);
+}
+
+__m128h test_mm_maskz_cvtss_sh(__mmask8 A, __m128h B, __m128 C) {
+ // CHECK-LABEL: test_mm_maskz_cvtss_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+ return _mm_maskz_cvtss_sh(A, B, C);
+}
+
+__m128h test_mm_cvt_roundsd_sh(__m128h A, __m128d B) {
+ // CHECK-LABEL: test_mm_cvt_roundsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_cvt_roundsd_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_mask_cvt_roundsd_sh(__m128h A, __mmask8 B, __m128h C, __m128d D) {
+ // CHECK-LABEL: test_mm_mask_cvt_roundsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_mask_cvt_roundsd_sh(A, B, C, D, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_maskz_cvt_roundsd_sh(__mmask8 A, __m128h B, __m128d C) {
+ // CHECK-LABEL: test_mm_maskz_cvt_roundsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_maskz_cvt_roundsd_sh(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtsd_sh(__m128h A, __m128d B) {
+ // CHECK-LABEL: test_mm_cvtsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_cvtsd_sh(A, B);
+}
+
+__m128h test_mm_mask_cvtsd_sh(__m128h A, __mmask8 B, __m128h C, __m128d D) {
+ // CHECK-LABEL: test_mm_mask_cvtsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_mask_cvtsd_sh(A, B, C, D);
+}
+
+__m128h test_mm_maskz_cvtsd_sh(__mmask8 A, __m128h B, __m128d C) {
+ // CHECK-LABEL: test_mm_maskz_cvtsd_sh
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+ return _mm_maskz_cvtsd_sh(A, B, C);
+}
+
+__m128d test_mm_cvt_roundsh_sd(__m128d A, __m128h B) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_cvt_roundsh_sd(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_mask_cvt_roundsh_sd(__m128d A, __mmask8 B, __m128d C, __m128h D) {
+ // CHECK-LABEL: test_mm_mask_cvt_roundsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_mask_cvt_roundsh_sd(A, B, C, D, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_maskz_cvt_roundsh_sd(__mmask8 A, __m128d B, __m128h C) {
+ // CHECK-LABEL: test_mm_maskz_cvt_roundsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_maskz_cvt_roundsh_sd(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_cvtsh_sd(__m128d A, __m128h B) {
+ // CHECK-LABEL: test_mm_cvtsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_cvtsh_sd(A, B);
+}
+
+__m128d test_mm_mask_cvtsh_sd(__m128d A, __mmask8 B, __m128d C, __m128h D) {
+ // CHECK-LABEL: test_mm_mask_cvtsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_mask_cvtsh_sd(A, B, C, D);
+}
+
+__m128d test_mm_maskz_cvtsh_sd(__mmask8 A, __m128d B, __m128h C) {
+ // CHECK-LABEL: test_mm_maskz_cvtsh_sd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+ return _mm_maskz_cvtsh_sd(A, B, C);
+}
+
+__m512i test_mm512_cvt_roundph_epi16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_cvt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_mask_cvt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_maskz_cvt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_cvtph_epi16(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_mask_cvtph_epi16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+ return _mm512_maskz_cvtph_epi16(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_cvtt_roundph_epi16(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_mask_cvtt_roundph_epi16(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_maskz_cvtt_roundph_epi16(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_cvttph_epi16(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_mask_cvttph_epi16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+ return _mm512_maskz_cvttph_epi16(A, B);
+}
+
+__m512h test_mm512_cvt_roundepi16_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepi16_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+ return _mm512_cvt_roundepi16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_mask_cvt_roundepi16_ph(__m512h A, __mmask32 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepi16_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+ return _mm512_mask_cvt_roundepi16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_maskz_cvt_roundepi16_ph(__mmask32 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepi16_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+ return _mm512_maskz_cvt_roundepi16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_cvtepi16_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_cvtepi16_ph(A);
+}
+
+__m512h test_mm512_mask_cvtepi16_ph(__m512h A, __mmask32 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_mask_cvtepi16_ph(A, B, C);
+}
+
+__m512h test_mm512_maskz_cvtepi16_ph(__mmask32 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_maskz_cvtepi16_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_cvt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_mask_cvt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_maskz_cvt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_cvtph_epu16(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_mask_cvtph_epu16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+ return _mm512_maskz_cvtph_epu16(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_cvtt_roundph_epu16(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_mask_cvtt_roundph_epu16(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_maskz_cvtt_roundph_epu16(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu16(__m512h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_cvttph_epu16(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu16(__m512i A, __mmask32 B, __m512h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_mask_cvttph_epu16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu16(__mmask32 A, __m512h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+ return _mm512_maskz_cvttph_epu16(A, B);
+}
+
+__m512h test_mm512_cvt_roundepu16_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepu16_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+ return _mm512_cvt_roundepu16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_mask_cvt_roundepu16_ph(__m512h A, __mmask32 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepu16_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+ return _mm512_mask_cvt_roundepu16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_maskz_cvt_roundepu16_ph(__mmask32 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepu16_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+ return _mm512_maskz_cvt_roundepu16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_cvtepu16_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_cvtepu16_ph(A);
+}
+
+__m512h test_mm512_mask_cvtepu16_ph(__m512h A, __mmask32 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_mask_cvtepu16_ph(A, B, C);
+}
+
+__m512h test_mm512_maskz_cvtepu16_ph(__mmask32 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+ return _mm512_maskz_cvtepu16_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epi32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_cvt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_mask_cvt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_maskz_cvt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_cvtph_epi32(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_mask_cvtph_epi32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+ return _mm512_maskz_cvtph_epi32(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_cvt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_mask_cvt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_maskz_cvt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_cvtph_epu32(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_mask_cvtph_epu32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+ return _mm512_maskz_cvtph_epu32(A, B);
+}
+
+__m256h test_mm512_cvt_roundepi32_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepi32_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+ return _mm512_cvt_roundepi32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvt_roundepi32_ph(__m256h A, __mmask16 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepi32_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+ return _mm512_mask_cvt_roundepi32_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvt_roundepi32_ph(__mmask16 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepi32_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+ return _mm512_maskz_cvt_roundepi32_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtepi32_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_cvtepi32_ph(A);
+}
+
+__m256h test_mm512_mask_cvtepi32_ph(__m256h A, __mmask16 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_mask_cvtepi32_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtepi32_ph(__mmask16 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_maskz_cvtepi32_ph(A, B);
+}
+
+__m256h test_mm512_cvt_roundepu32_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepu32_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+ return _mm512_cvt_roundepu32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvt_roundepu32_ph(__m256h A, __mmask16 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepu32_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+ return _mm512_mask_cvt_roundepu32_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvt_roundepu32_ph(__mmask16 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepu32_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+ return _mm512_maskz_cvt_roundepu32_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtepu32_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_cvtepu32_ph(A);
+}
+
+__m256h test_mm512_mask_cvtepu32_ph(__m256h A, __mmask16 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_mask_cvtepu32_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtepu32_ph(__mmask16 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+ return _mm512_maskz_cvtepu32_ph(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_cvtt_roundph_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_mask_cvtt_roundph_epi32(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_maskz_cvtt_roundph_epi32(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_cvttph_epi32(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_mask_cvttph_epi32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+ return _mm512_maskz_cvttph_epi32(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_cvtt_roundph_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_mask_cvtt_roundph_epu32(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_maskz_cvtt_roundph_epu32(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu32(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_cvttph_epu32(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu32(__m512i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_mask_cvttph_epu32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu32(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+ return _mm512_maskz_cvttph_epu32(A, B);
+}
+
+__m128h test_mm512_cvt_roundepi64_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepi64_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+ return _mm512_cvt_roundepi64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundepi64_ph(__m128h A, __mmask8 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepi64_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+ return _mm512_mask_cvt_roundepi64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundepi64_ph(__mmask8 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepi64_ph
+ // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+ return _mm512_maskz_cvt_roundepi64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtepi64_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepi64_ph
+ // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_cvtepi64_ph(A);
+}
+
+__m128h test_mm512_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepi64_ph
+ // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtepi64_ph(__mmask8 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepi64_ph
+ // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_maskz_cvtepi64_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_cvt_roundph_epi64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_mask_cvt_roundph_epi64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_maskz_cvt_roundph_epi64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_cvtph_epi64(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_mask_cvtph_epi64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+ return _mm512_maskz_cvtph_epi64(A, B);
+}
+
+__m128h test_mm512_cvt_roundepu64_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvt_roundepu64_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+ return _mm512_cvt_roundepu64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundepu64_ph(__m128h A, __mmask8 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundepu64_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+ return _mm512_mask_cvt_roundepu64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundepu64_ph(__mmask8 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundepu64_ph
+ // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+ return _mm512_maskz_cvt_roundepu64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtepu64_ph(__m512i A) {
+ // CHECK-LABEL: test_mm512_cvtepu64_ph
+ // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_cvtepu64_ph(A);
+}
+
+__m128h test_mm512_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m512i C) {
+ // CHECK-LABEL: test_mm512_mask_cvtepu64_ph
+ // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtepu64_ph(__mmask8 A, __m512i B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtepu64_ph
+ // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+ return _mm512_maskz_cvtepu64_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_cvt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_mask_cvt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_maskz_cvt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_cvtph_epu64(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_mask_cvtph_epu64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+ return _mm512_maskz_cvtph_epu64(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_cvtt_roundph_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_mask_cvtt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_maskz_cvtt_roundph_epi64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_cvttph_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_mask_cvttph_epi64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+ return _mm512_maskz_cvttph_epi64(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvtt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_cvtt_roundph_epu64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_mask_cvtt_roundph_epu64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_maskz_cvtt_roundph_epu64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm512_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_cvttph_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu64(__m512i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm512_mask_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_mask_cvttph_epu64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+ return _mm512_maskz_cvttph_epu64(A, B);
+}
+
+int test_mm_cvt_roundsh_i32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_i32
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2si32
+ return _mm_cvt_roundsh_i32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvtsh_i32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtsh_i32
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2si32
+ return _mm_cvtsh_i32(A);
+}
+
+unsigned int test_mm_cvt_roundsh_u32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_u32
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi32
+ return _mm_cvt_roundsh_u32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+unsigned int test_mm_cvtsh_u32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtsh_u32
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi32
+ return _mm_cvtsh_u32(A);
+}
+
+#ifdef __x86_64__
+long long test_mm_cvt_roundsh_i64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_i64
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2si64
+ return _mm_cvt_roundsh_i64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvtsh_i64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtsh_i64
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2si64
+ return _mm_cvtsh_i64(A);
+}
+
+unsigned long long test_mm_cvt_roundsh_u64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvt_roundsh_u64
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi64
+ return _mm_cvt_roundsh_u64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvtsh_u64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtsh_u64
+ // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi64
+ return _mm_cvtsh_u64(A);
+}
+#endif
+
+__m128h test_mm_cvt_roundu32_sh(__m128h A, unsigned int B) {
+ // CHECK-LABEL: test_mm_cvt_roundu32_sh
+ // CHECK: @llvm.x86.avx512fp16.vcvtusi2sh
+ return _mm_cvt_roundu32_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtu32_sh(__m128h A, unsigned int B) {
+ // CHECK-LABEL: test_mm_cvtu32_sh
+ // CHECK: %{{.*}} = uitofp i32 %{{.*}} to half
+ return _mm_cvtu32_sh(A, B);
+}
+
+#ifdef __x86_64__
+__m128h test_mm_cvt_roundu64_sh(__m128h A, unsigned long long B) {
+ // CHECK-LABEL: test_mm_cvt_roundu64_sh
+ // CHECK: @llvm.x86.avx512fp16.vcvtusi642sh
+ return _mm_cvt_roundu64_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtu64_sh(__m128h A, unsigned long long B) {
+ // CHECK-LABEL: test_mm_cvtu64_sh
+ // CHECK: %{{.*}} = uitofp i64 %{{.*}} to half
+ return _mm_cvtu64_sh(A, B);
+}
+#endif
+
+__m128h test_mm_cvt_roundi32_sh(__m128h A, int B) {
+ // CHECK-LABEL: test_mm_cvt_roundi32_sh
+ // CHECK: @llvm.x86.avx512fp16.vcvtsi2sh
+ return _mm_cvt_roundi32_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvti32_sh(__m128h A, int B) {
+ // CHECK-LABEL: test_mm_cvti32_sh
+ // CHECK: %{{.*}} = sitofp i32 %{{.*}} to half
+ return _mm_cvti32_sh(A, B);
+}
+
+#ifdef __x86_64__
+__m128h test_mm_cvt_roundi64_sh(__m128h A, long long B) {
+ // CHECK-LABEL: test_mm_cvt_roundi64_sh
+ // CHECK: @llvm.x86.avx512fp16.vcvtsi642sh
+ return _mm_cvt_roundi64_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvti64_sh(__m128h A, long long B) {
+ // CHECK-LABEL: test_mm_cvti64_sh
+ // CHECK: %{{.*}} = sitofp i64 %{{.*}} to half
+ return _mm_cvti64_sh(A, B);
+}
+#endif
+
+int test_mm_cvtt_roundsh_i32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtt_roundsh_i32
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2si32
+ return _mm_cvtt_roundsh_i32(A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsh_i32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttsh_i32
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2si32
+ return _mm_cvttsh_i32(A);
+}
+
+#ifdef __x86_64__
+long long test_mm_cvtt_roundsh_i64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtt_roundsh_i64
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2si64
+ return _mm_cvtt_roundsh_i64(A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttsh_i64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttsh_i64
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2si64
+ return _mm_cvttsh_i64(A);
+}
+#endif
+
+unsigned int test_mm_cvtt_roundsh_u32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtt_roundsh_u32
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi32
+ return _mm_cvtt_roundsh_u32(A, _MM_FROUND_NO_EXC);
+}
+
+unsigned int test_mm_cvttsh_u32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttsh_u32
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi32
+ return _mm_cvttsh_u32(A);
+}
+
+#ifdef __x86_64__
+unsigned long long test_mm_cvtt_roundsh_u64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtt_roundsh_u64
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi64
+ return _mm_cvtt_roundsh_u64(A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttsh_u64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttsh_u64
+ // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi64
+ return _mm_cvttsh_u64(A);
+}
+#endif
+
+__m512 test_mm512_cvtx_roundph_ps(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtx_roundph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_cvtx_roundph_ps(A, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_mask_cvtx_roundph_ps(__m512 A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtx_roundph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_mask_cvtx_roundph_ps(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_maskz_cvtx_roundph_ps(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtx_roundph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_maskz_cvtx_roundph_ps(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_cvtxph_ps(__m256h A) {
+ // CHECK-LABEL: test_mm512_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_cvtxph_ps(A);
+}
+
+__m512 test_mm512_mask_cvtxph_ps(__m512 A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm512_mask_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_mask_cvtxph_ps(A, B, C);
+}
+
+__m512 test_mm512_maskz_cvtxph_ps(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+ return _mm512_maskz_cvtxph_ps(A, B);
+}
+
+__m256h test_mm512_cvtx_roundps_ph(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvtx_roundps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_cvtx_roundps_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvtx_roundps_ph(__m256h A, __mmask16 B, __m512 C) {
+ // CHECK-LABEL: test_mm512_mask_cvtx_roundps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_mask_cvtx_roundps_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvtx_roundps_ph(__mmask16 A, __m512 B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtx_roundps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_maskz_cvtx_roundps_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtxps_ph(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_cvtxps_ph(A);
+}
+
+__m256h test_mm512_mask_cvtxps_ph(__m256h A, __mmask16 B, __m512 C) {
+ // CHECK-LABEL: test_mm512_mask_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_mask_cvtxps_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtxps_ph(__mmask16 A, __m512 B) {
+ // CHECK-LABEL: test_mm512_maskz_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+ return _mm512_maskz_cvtxps_ph(A, B);
+}
+
_Float16 test_mm512_reduce_add_ph(__m512h __W) {
// CHECK-LABEL: @test_mm512_reduce_add_ph
// CHECK: call reassoc half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
index a4e3b1e2be941..0d020ccd1452f 100644
--- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -1215,6 +1215,798 @@ __mmask8 test_mm_mask_cmp_ph_mask_true_us(__mmask8 m, __m128h a, __m128h b) {
return _mm_mask_cmp_ph_mask(m, a, b, _CMP_TRUE_US);
}
+__m128h test_mm_cvtpd_ph(__m128d A) {
+ // CHECK-LABEL: test_mm_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+ return _mm_cvtpd_ph(A);
+}
+
+__m128h test_mm_mask_cvtpd_ph(__m128h A, __mmask8 B, __m128d C) {
+ // CHECK-LABEL: test_mm_mask_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+ return _mm_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtpd_ph(__mmask8 A, __m128d B) {
+ // CHECK-LABEL: test_mm_maskz_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+ return _mm_maskz_cvtpd_ph(A, B);
+}
+
+__m128h test_mm256_cvtpd_ph(__m256d A) {
+ // CHECK-LABEL: test_mm256_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+ return _mm256_cvtpd_ph(A);
+}
+
+__m128h test_mm256_mask_cvtpd_ph(__m128h A, __mmask8 B, __m256d C) {
+ // CHECK-LABEL: test_mm256_mask_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+ return _mm256_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtpd_ph(__mmask8 A, __m256d B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtpd_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+ return _mm256_maskz_cvtpd_ph(A, B);
+}
+
+__m128d test_mm_cvtph_pd(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+ return _mm_cvtph_pd(A);
+}
+
+__m128d test_mm_mask_cvtph_pd(__m128d A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+ return _mm_mask_cvtph_pd(A, B, C);
+}
+
+__m128d test_mm_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+ return _mm_maskz_cvtph_pd(A, B);
+}
+
+__m256d test_mm256_cvtph_pd(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+ return _mm256_cvtph_pd(A);
+}
+
+__m256d test_mm256_mask_cvtph_pd(__m256d A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+ return _mm256_mask_cvtph_pd(A, B, C);
+}
+
+__m256d test_mm256_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_pd
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+ return _mm256_maskz_cvtph_pd(A, B);
+}
+
+__m128i test_mm_cvtph_epi16(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+ return _mm_cvtph_epi16(A);
+}
+
+__m128i test_mm_mask_cvtph_epi16(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+ return _mm_mask_cvtph_epi16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi16(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+ return _mm_maskz_cvtph_epi16(A, B);
+}
+
+__m256i test_mm256_cvtph_epi16(__m256h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+ return _mm256_cvtph_epi16(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi16(__m256i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+ return _mm256_mask_cvtph_epi16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi16(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+ return _mm256_maskz_cvtph_epi16(A, B);
+}
+
+__m128i test_mm_cvttph_epi16(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+ return _mm_cvttph_epi16(A);
+}
+
+__m128i test_mm_mask_cvttph_epi16(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+ return _mm_mask_cvttph_epi16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi16(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+ return _mm_maskz_cvttph_epi16(A, B);
+}
+
+__m256i test_mm256_cvttph_epi16(__m256h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+ return _mm256_cvttph_epi16(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi16(__m256i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+ return _mm256_mask_cvttph_epi16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi16(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epi16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+ return _mm256_maskz_cvttph_epi16(A, B);
+}
+
+__m128h test_mm_cvtepi16_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_cvtepi16_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi16_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_mask_cvtepi16_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi16_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_maskz_cvtepi16_ph(A, B);
+}
+
+__m256h test_mm256_cvtepi16_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_cvtepi16_ph(A);
+}
+
+__m256h test_mm256_mask_cvtepi16_ph(__m256h A, __mmask16 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_mask_cvtepi16_ph(A, B, C);
+}
+
+__m256h test_mm256_maskz_cvtepi16_ph(__mmask16 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepi16_ph
+ // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_maskz_cvtepi16_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epu16(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+ return _mm_cvtph_epu16(A);
+}
+
+__m128i test_mm_mask_cvtph_epu16(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+ return _mm_mask_cvtph_epu16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu16(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+ return _mm_maskz_cvtph_epu16(A, B);
+}
+
+__m256i test_mm256_cvtph_epu16(__m256h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+ return _mm256_cvtph_epu16(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu16(__m256i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+ return _mm256_mask_cvtph_epu16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu16(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+ return _mm256_maskz_cvtph_epu16(A, B);
+}
+
+__m128i test_mm_cvttph_epu16(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+ return _mm_cvttph_epu16(A);
+}
+
+__m128i test_mm_mask_cvttph_epu16(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+ return _mm_mask_cvttph_epu16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu16(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+ return _mm_maskz_cvttph_epu16(A, B);
+}
+
+__m256i test_mm256_cvttph_epu16(__m256h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+ return _mm256_cvttph_epu16(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu16(__m256i A, __mmask16 B, __m256h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+ return _mm256_mask_cvttph_epu16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu16(__mmask16 A, __m256h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epu16
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+ return _mm256_maskz_cvttph_epu16(A, B);
+}
+
+__m128h test_mm_cvtepu16_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_cvtepu16_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu16_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_mask_cvtepu16_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu16_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+ return _mm_maskz_cvtepu16_ph(A, B);
+}
+
+__m256h test_mm256_cvtepu16_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_cvtepu16_ph(A);
+}
+
+__m256h test_mm256_mask_cvtepu16_ph(__m256h A, __mmask16 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_mask_cvtepu16_ph(A, B, C);
+}
+
+__m256h test_mm256_maskz_cvtepu16_ph(__mmask16 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepu16_ph
+ // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+ return _mm256_maskz_cvtepu16_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epi32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+ return _mm_cvtph_epi32(A);
+}
+
+__m128i test_mm_mask_cvtph_epi32(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+ return _mm_mask_cvtph_epi32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+ return _mm_maskz_cvtph_epi32(A, B);
+}
+
+__m256i test_mm256_cvtph_epi32(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+ return _mm256_cvtph_epi32(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi32(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+ return _mm256_mask_cvtph_epi32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+ return _mm256_maskz_cvtph_epi32(A, B);
+}
+
+__m128i test_mm_cvtph_epu32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+ return _mm_cvtph_epu32(A);
+}
+
+__m128i test_mm_mask_cvtph_epu32(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+ return _mm_mask_cvtph_epu32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+ return _mm_maskz_cvtph_epu32(A, B);
+}
+
+__m256i test_mm256_cvtph_epu32(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+ return _mm256_cvtph_epu32(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu32(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+ return _mm256_mask_cvtph_epu32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+ return _mm256_maskz_cvtph_epu32(A, B);
+}
+
+__m128h test_mm_cvtepi32_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepi32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+ return _mm_cvtepi32_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepi32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+ return _mm_mask_cvtepi32_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi32_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepi32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+ return _mm_maskz_cvtepi32_ph(A, B);
+}
+
+__m128h test_mm256_cvtepi32_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_cvtepi32_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_mask_cvtepi32_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepi32_ph(__mmask8 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepi32_ph
+ // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_maskz_cvtepi32_ph(A, B);
+}
+
+__m128h test_mm_cvtepu32_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepu32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+ return _mm_cvtepu32_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepu32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+ return _mm_mask_cvtepu32_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu32_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepu32_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+ return _mm_maskz_cvtepu32_ph(A, B);
+}
+
+__m128h test_mm256_cvtepu32_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_cvtepu32_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_mask_cvtepu32_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepu32_ph(__mmask8 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepu32_ph
+ // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+ return _mm256_maskz_cvtepu32_ph(A, B);
+}
+
+__m128i test_mm_cvttph_epi32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+ return _mm_cvttph_epi32(A);
+}
+
+__m128i test_mm_mask_cvttph_epi32(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+ return _mm_mask_cvttph_epi32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+ return _mm_maskz_cvttph_epi32(A, B);
+}
+
+__m256i test_mm256_cvttph_epi32(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+ return _mm256_cvttph_epi32(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi32(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+ return _mm256_mask_cvttph_epi32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epi32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+ return _mm256_maskz_cvttph_epi32(A, B);
+}
+
+__m128i test_mm_cvttph_epu32(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+ return _mm_cvttph_epu32(A);
+}
+
+__m128i test_mm_mask_cvttph_epu32(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+ return _mm_mask_cvttph_epu32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+ return _mm_maskz_cvttph_epu32(A, B);
+}
+
+__m256i test_mm256_cvttph_epu32(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+ return _mm256_cvttph_epu32(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu32(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+ return _mm256_mask_cvttph_epu32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epu32
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+ return _mm256_maskz_cvttph_epu32(A, B);
+}
+
+__m128h test_mm_cvtepi64_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+ return _mm_cvtepi64_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+ return _mm_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi64_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+ return _mm_maskz_cvtepi64_ph(A, B);
+}
+
+__m128h test_mm256_cvtepi64_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+ return _mm256_cvtepi64_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+ return _mm256_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepi64_ph(__mmask8 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepi64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+ return _mm256_maskz_cvtepi64_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+ return _mm_cvtph_epi64(A);
+}
+
+__m128i test_mm_mask_cvtph_epi64(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+ return _mm_mask_cvtph_epi64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+ return _mm_maskz_cvtph_epi64(A, B);
+}
+
+__m256i test_mm256_cvtph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+ return _mm256_cvtph_epi64(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi64(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+ return _mm256_mask_cvtph_epi64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+ return _mm256_maskz_cvtph_epi64(A, B);
+}
+
+__m128h test_mm_cvtepu64_ph(__m128i A) {
+ // CHECK-LABEL: test_mm_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+ return _mm_cvtepu64_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m128i C) {
+ // CHECK-LABEL: test_mm_mask_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+ return _mm_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu64_ph(__mmask8 A, __m128i B) {
+ // CHECK-LABEL: test_mm_maskz_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+ return _mm_maskz_cvtepu64_ph(A, B);
+}
+
+__m128h test_mm256_cvtepu64_ph(__m256i A) {
+ // CHECK-LABEL: test_mm256_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+ return _mm256_cvtepu64_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m256i C) {
+ // CHECK-LABEL: test_mm256_mask_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+ return _mm256_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepu64_ph(__mmask8 A, __m256i B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtepu64_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+ return _mm256_maskz_cvtepu64_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+ return _mm_cvtph_epu64(A);
+}
+
+__m128i test_mm_mask_cvtph_epu64(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+ return _mm_mask_cvtph_epu64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+ return _mm_maskz_cvtph_epu64(A, B);
+}
+
+__m256i test_mm256_cvtph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+ return _mm256_cvtph_epu64(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu64(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+ return _mm256_mask_cvtph_epu64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+ return _mm256_maskz_cvtph_epu64(A, B);
+}
+
+__m128i test_mm_cvttph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+ return _mm_cvttph_epi64(A);
+}
+
+__m128i test_mm_mask_cvttph_epi64(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+ return _mm_mask_cvttph_epi64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+ return _mm_maskz_cvttph_epi64(A, B);
+}
+
+__m256i test_mm256_cvttph_epi64(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+ return _mm256_cvttph_epi64(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi64(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+ return _mm256_mask_cvttph_epi64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epi64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+ return _mm256_maskz_cvttph_epi64(A, B);
+}
+
+__m128i test_mm_cvttph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+ return _mm_cvttph_epu64(A);
+}
+
+__m128i test_mm_mask_cvttph_epu64(__m128i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+ return _mm_mask_cvttph_epu64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+ return _mm_maskz_cvttph_epu64(A, B);
+}
+
+__m256i test_mm256_cvttph_epu64(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+ return _mm256_cvttph_epu64(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu64(__m256i A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+ return _mm256_mask_cvttph_epu64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvttph_epu64
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+ return _mm256_maskz_cvttph_epu64(A, B);
+}
+
+__m128 test_mm_cvtxph_ps(__m128h A) {
+ // CHECK-LABEL: test_mm_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+ return _mm_cvtxph_ps(A);
+}
+
+__m128 test_mm_mask_cvtxph_ps(__m128 A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm_mask_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+ return _mm_mask_cvtxph_ps(A, B, C);
+}
+
+__m128 test_mm_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm_maskz_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+ return _mm_maskz_cvtxph_ps(A, B);
+}
+
+__m256 test_mm256_cvtxph_ps(__m128h A) {
+ // CHECK-LABEL: test_mm256_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+ return _mm256_cvtxph_ps(A);
+}
+
+__m256 test_mm256_mask_cvtxph_ps(__m256 A, __mmask8 B, __m128h C) {
+ // CHECK-LABEL: test_mm256_mask_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+ return _mm256_mask_cvtxph_ps(A, B, C);
+}
+
+__m256 test_mm256_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtxph_ps
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+ return _mm256_maskz_cvtxph_ps(A, B);
+}
+
+__m128h test_mm_cvtxps_ph(__m128 A) {
+ // CHECK-LABEL: test_mm_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+ return _mm_cvtxps_ph(A);
+}
+
+__m128h test_mm_mask_cvtxps_ph(__m128h A, __mmask8 B, __m128 C) {
+ // CHECK-LABEL: test_mm_mask_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+ return _mm_mask_cvtxps_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtxps_ph(__mmask8 A, __m128 B) {
+ // CHECK-LABEL: test_mm_maskz_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+ return _mm_maskz_cvtxps_ph(A, B);
+}
+
+__m128h test_mm256_cvtxps_ph(__m256 A) {
+ // CHECK-LABEL: test_mm256_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+ return _mm256_cvtxps_ph(A);
+}
+
+__m128h test_mm256_mask_cvtxps_ph(__m128h A, __mmask8 B, __m256 C) {
+ // CHECK-LABEL: test_mm256_mask_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+ return _mm256_mask_cvtxps_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtxps_ph(__mmask8 A, __m256 B) {
+ // CHECK-LABEL: test_mm256_maskz_cvtxps_ph
+ // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+ return _mm256_maskz_cvtxps_ph(A, B);
+}
+
__m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
// CHECK-LABEL: @test_mm_mask_blend_ph
// CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index eba83493e686d..72e9c3404775d 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5224,4 +5224,321 @@ let TargetPrefix = "x86" in {
Intrinsic<[ llvm_i32_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2psx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_512
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+ Intrinsic<[ llvm_v4f64_ty ],
+ [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+ Intrinsic<[ llvm_v8f64_ty ],
+ [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2ss_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtss2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsd2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2sd_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtdq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtudq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_vcvtsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtusi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtusi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
}
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c73172612b1e1..dd28780ac946f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
+HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 74946c09fad96..201697c37140f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -237,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F16_F32;
if (RetVT == MVT::f64)
return FPEXT_F16_F64;
+ if (RetVT == MVT::f80)
+ return FPEXT_F16_F80;
if (RetVT == MVT::f128)
return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c191d4d19fa84..9d735c8b3b0af 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1931,6 +1931,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
+ }
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
@@ -1939,8 +1946,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setGroup(MVT::v32f16);
addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
+ MVT::v32i16);
+
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
@@ -1960,6 +1990,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
@@ -2001,6 +2046,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
+ if (Subtarget.hasFP16()) {
+ // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
+ // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
+ // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
+ // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
+ }
+
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
@@ -19993,6 +20069,43 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, dl));
}
+// Try to use a packed vector operation to handle i64 on 32-bit targets.
+static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) &&
+ "Unexpected opcode!");
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+
+ if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
+ return SDValue();
+
+ // Pack the i64 into a vector, do the operation and extract.
+
+ assert(Subtarget.hasFP16() && "Expected FP16");
+
+ SDLoc dl(Op);
+ SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ if (IsStrict) {
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
+ {Op.getOperand(0), InVec});
+ SDValue Chain = CvtVec.getValue(1);
+ SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Value, Chain}, dl);
+ }
+
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
const X86Subtarget &Subtarget) {
switch (Opcode) {
@@ -20245,6 +20358,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// SSE doesn't have an i16 conversion so we need to promote.
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
@@ -20724,6 +20839,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// The transform for i64->f64 isn't correct for 0 when rounding to negative
// infinity. It produces -0.0, so disable under strictfp.
@@ -21505,9 +21622,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
MVT VT = Op->getSimpleValueType(0);
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ SDValue Res;
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -21532,10 +21651,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
}
- SDValue Res, Chain;
if (IsStrict) {
- Res =
- DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
+ Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Opc, dl, ResVT, Src);
@@ -21549,6 +21666,67 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+ if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
+ if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
+ return Op;
+
+ MVT ResVT = VT;
+ MVT EleVT = VT.getVectorElementType();
+ if (EleVT != MVT::i64)
+ ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
+ : X86ISD::STRICT_CVTTP2UI,
+ dl, {ResVT, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
+ ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ ResVT = MVT::getVectorVT(EleVT, 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
+ }
+
+ if (ResVT != VT)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
+ : ISD::STRICT_FP_TO_UINT,
+ dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
+ MVT::v8i32, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
// v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
assert(!IsSigned && "Expected unsigned conversion!");
@@ -21572,10 +21750,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
@@ -21603,10 +21780,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
@@ -21631,7 +21807,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
{Src, Zero, Zero, Zero});
Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Tmp});
+ {Chain, Tmp});
SDValue Chain = Tmp.getValue(1);
Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
DAG.getIntPtrConstant(0, dl));
@@ -21714,17 +21890,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// FIXME: This does not generate an invalid exception if the input does not
// fit in i32. PR44019
if (Subtarget.is64Bit()) {
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21739,17 +21914,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// fit in i16. PR44019
if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21765,7 +21939,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(SrcVT, VT);
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
SDLoc(Op), Chain);
@@ -21777,7 +21950,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
// Fall back to X87.
- SDValue Chain;
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
if (IsStrict)
return DAG.getMergeValues({V, Chain}, dl);
@@ -22004,6 +22176,35 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::f128)
return SDValue();
+ if (VT == MVT::f80) {
+ if (SVT == MVT::f16) {
+ assert(Subtarget.hasFP16() && "Unexpected features!");
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ makeLibCall(DAG, LC, VT, In, CallOptions, DL,
+ IsStrict ? Op.getOperand(0) : SDValue());
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
+ else
+ return Tmp.first;
+ }
+ return Op;
+ }
+
+ if (SVT.getVectorElementType() == MVT::f16) {
+ assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
+ if (SVT == MVT::v2f16)
+ In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
+ DAG.getUNDEF(MVT::v2f16));
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
+ DAG.getUNDEF(MVT::v4f16));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+ {Op->getOperand(0), Res});
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
SDValue Res =
@@ -22017,8 +22218,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
- // It's legal except when f128 is involved
- if (In.getSimpleValueType() != MVT::f128)
+ MVT VT = Op.getSimpleValueType();
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved or we're converting f80->f16.
+ if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
return Op;
return SDValue();
@@ -31113,6 +31317,51 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
+ if (VT.isVector() && Subtarget.hasFP16() &&
+ SrcVT.getVectorElementType() == MVT::f16) {
+ EVT EleVT = VT.getVectorElementType();
+ EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ Res =
+ DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ Res = DAG.getNode(Opc, dl, ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / TmpVT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ }
+
+ Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
+
+ return;
+ }
+
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
"Unexpected type action!");
@@ -31287,9 +31536,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
+ Subtarget.hasVLX()) {
+ if (Src.getValueType().getVectorElementType() == MVT::i16)
+ return;
+
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
+ : DAG.getUNDEF(MVT::v2i32));
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
+ SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
+ Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
+ }
+ return;
+ }
if (VT != MVT::v2f32)
return;
- SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
if (IsStrict) {
@@ -31390,14 +31661,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_ROUND: {
bool IsStrict = N->isStrictFPOpcode();
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ EVT VT = N->getValueType(0);
+ EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
+ : DAG.getUNDEF(MVT::v2f32);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
+ }
if (!isTypeLegal(Src.getValueType()))
return;
SDValue V;
if (IsStrict)
- V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
+ {N->getOperand(0), Src});
else
- V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
Results.push_back(V);
if (IsStrict)
Results.push_back(V.getValue(1));
@@ -31409,6 +31687,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
"Do not know how to legalize this Node");
+ if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
+ return;
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
+ : DAG.getUNDEF(MVT::v2f16);
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
+ if (IsStrict)
+ V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), V});
+ else
+ V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
+ Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -49415,10 +49708,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
+ // UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
+ // UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+ }
+
// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
@@ -49457,10 +49771,31 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
+ // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+ // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+ }
+
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
@@ -51306,6 +51641,9 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -51414,6 +51752,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 34621b4e68dbf..7a2b6ade1796c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7531,8 +7531,8 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
- string aliasStr> {
- let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ string aliasStr, Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
@@ -7548,7 +7548,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
[(set DstVT.RC:$dst, (OpNode
(SrcVT.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- } // Predicates = [HasAVX512]
+ } // Predicates = [prd]
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7712,8 +7712,9 @@ def : Pat<(v2f64 (X86Movsd
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
SDNode OpNodeInt, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
+ X86FoldableSchedWrite sched, string aliasStr,
+ Predicate prd = HasAVX512> {
+let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7740,7 +7741,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
-} //HasAVX512
+} // Predicates = [prd]
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7838,33 +7839,47 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
EVEX_4V, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
-multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
+multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
- OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
+ OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
- EVEX_CD8<32, CD8VT1>, XS;
+ EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+ f32x_info>, XD, VEX_W;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
- f64x_info>;
+ f64x_info>, XS;
+defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f64x_info,
+ f16x_info, HasFP16>, T_MAP5XD, VEX_W;
+defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f64x_info, HasFP16>, T_MAP5XS;
+defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f32x_info,
+ f16x_info, HasFP16>, T_MAP5PS;
+defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f32x_info, HasFP16>, T_MAP6PS;
def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
@@ -7877,6 +7892,27 @@ def : Pat<(f32 (any_fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
+def : Pat<(f32 (any_fpextend FR16X:$src)),
+ (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f64 (any_fpextend FR16X:$src)),
+ (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f16 (any_fpround FR32X:$src)),
+ (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f16 (any_fpround FR64X:$src)),
+ (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
+ Requires<[HasFP16]>;
+
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
@@ -7990,39 +8026,82 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
-// Extend Float to Double
-multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
+// Extend [Float to Double, Half to Float]
+multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
any_fpextend, fpextend, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
+ X86any_vfpext, X86vfpext, sched.XMM,
+ _dst.info128.BroadcastStr,
"", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
- any_fpextend, fpextend, sched.YMM>, EVEX_V256;
- }
-}
-
-// Truncate Double to Float
-multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
+ any_fpextend, fpextend, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Truncate [Double to Float, Float to Half]
+multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512,
+ PatFrag bcast128 = _src.info128.BroadcastLdFrag,
+ PatFrag bcast256 = _src.info256.BroadcastLdFrag,
+ PatFrag bcast512 = _src.info512.BroadcastLdFrag,
+ PatFrag loadVT128 = _src.info128.LdFrag,
+ PatFrag loadVT256 = _src.info256.LdFrag,
+ PatFrag loadVT512 = _src.info512.LdFrag,
+ RegisterClass maskRC128 = _src.info128.KRCWM,
+ RegisterClass maskRC256 = _src.info256.KRCWM,
+ RegisterClass maskRC512 = _src.info512.KRCWM> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
X86any_vfpround, X86vfpround, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
- f128mem, VK2WM>, EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
+ null_frag, null_frag, sched.XMM,
+ _src.info128.BroadcastStr, "{x}",
+ f128mem, maskRC128>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
X86any_vfpround, X86vfpround,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ _dst.info128.ImmAllZerosV, maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8066,40 +8145,185 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
VK4WM:$mask, f64mem:$src), 0, "att">;
}
-defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
+defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
+ avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
- PS, EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
+ avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
+ PS, EVEX_CD8<32, CD8VH>;
-let Predicates = [HasVLX] in {
+// Extend Half to Double
+multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ any_fpextend, fpextend, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ X86vfpextSAE, sched.ZMM>, EVEX_V512;
+ def : Pat<(v8f64 (extloadv8f16 addr:$src)),
+ (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
+ f32mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
+ f64mem>, EVEX_V256;
+ }
+}
+
+// Truncate Double to Half
+multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86vfproundRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
+ null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
+ VK4WM>, EVEX_V256;
+ }
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
+ avx512vl_f32_info, SchedWriteCvtPD2PS,
+ HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
+defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
+ avx512vl_f16_info, SchedWriteCvtPS2PD,
+ HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
+defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
+ VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
+ T_MAP5PS, EVEX_CD8<16, CD8VQ>;
+
+let Predicates = [HasFP16, HasVLX] in {
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
- (VCVTPD2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
+ (VCVTPD2PHZ256rr VR256X:$src)>;
+ def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask)),
+ (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
+ (VCVTPD2PHZ256rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ256rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
+ (VCVTPD2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+ (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
- (VCVTPD2PSZ128rm addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
+ (VCVTPD2PHZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+ (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
- (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ128rmb addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
@@ -8420,26 +8644,60 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
}
// Convert Signed/Unsigned Quardword to Float
-multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- let Predicates = [HasDQI] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
+// Also Convert Signed/Unsigned Doubleword to Half
+multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
+ SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasDQI> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasDQI, HasVLX] in {
+ let Predicates = [prd, HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
- null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
+ null_frag, sched.XMM, _src.info128.BroadcastStr,
+ "{x}", i128mem, _src.info128.KRCWM>,
EVEX_V128, NotEVEX2VEXConvertible;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
- MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
+ MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
+ "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
+
+ // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8581,13 +8839,29 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
VEX_W, XS, EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
- sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, PS, EVEX_CD8<64, CD8VF>;
+defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>,
+ T_MAP5PS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, PS,
+ EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
- uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, XD, EVEX_CD8<64, CD8VF>;
+defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, XD,
+ EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8777,66 +9051,6 @@ let Predicates = [HasVLX] in {
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI, HasVLX] in {
- // Special patterns to allow use of X86VMSintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
- (VCVTQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
- (VCVTQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-
- // Special patterns to allow use of X86VMUintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
- (VCVTUQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
- (VCVTUQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTUQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
@@ -12663,3 +12877,510 @@ let hasSideEffects = 0 in {
def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
}
+
+// Convert 16-bit float to i16/u16
+multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert 16-bit float to i16/u16 truncate
+multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XS, EVEX_CD8<16, CD8VF>;
+
+// Convert Half to Signed/Unsigned Doubleword
+multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+
+defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VH>;
+defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5XS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+// Convert Half to Signed/Unsigned Quardword
+multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
+ EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
+ EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+// Convert Signed/Unsigned Quardword to Half
+multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
+ // 512 memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
+ MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ i128mem, VK2WM>,
+ EVEX_V128, NotEVEX2VEXConvertible;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
+ null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
+ i256mem, VK4WM>,
+ EVEX_V256, NotEVEX2VEXConvertible;
+ }
+
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
+ EVEX_CD8<64, CD8VF>;
+
+// Convert half to signed/unsigned int 32/64
+defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+let Predicates = [HasFP16] in {
+ defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
+ T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32,
+ "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+ def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+
+ def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_sint_to_fp GR32:$src)),
+ (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_sint_to_fp GR64:$src)),
+ (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp GR32:$src)),
+ (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_uint_to_fp GR64:$src)),
+ (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
+ // which produce unnecessary vmovsh instructions
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
+ (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
+ (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
+ (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
+ (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
+ (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
+ (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
+ (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
+ (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+} // Predicates = [HasFP16]
+
+let Predicates = [HasFP16, HasVLX] in {
+ // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
+ (VCVTQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
+ (VCVTQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
+ (VCVTQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
+ (VCVTQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
+ (VCVTUQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
+ (VCVTUQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
+ (VCVTUQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
+ (VCVTUQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 251d66575080e..44007b34fcfe2 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -4455,8 +4455,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
{ X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
{ X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
+ { X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
{ X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
{ X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index fe9fea40029b0..b562c360c359e 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,14 +130,12 @@ def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>>;
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>>;
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>,
[SDNPHasChain]>;
def X86any_vfpext : PatFrags<(ops node:$src),
@@ -145,13 +143,13 @@ def X86any_vfpext : PatFrags<(ops node:$src),
(X86vfpext node:$src)]>;
def X86vfpround: SDNode<"X86ISD::VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>,
[SDNPHasChain]>;
@@ -160,33 +158,32 @@ def X86any_vfpround : PatFrags<(ops node:$src),
(X86vfpround node:$src)]>;
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
def X86fpexts : SDNode<"X86ISD::VFPEXTS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
- SDTCisSameSizeAs<0, 1>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>>;
@@ -709,7 +706,6 @@ def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
// Masked versions of above
def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisInt<1>,
- SDTCisSameSizeAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
@@ -757,12 +753,12 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ab5a34181cc3a..a57a956e8135e 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5177,6 +5177,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VCVTUSI642SDZrr_Int:
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
+ case X86::VCVTSI2SHZrr:
+ case X86::VCVTSI2SHZrm:
+ case X86::VCVTSI2SHZrr_Int:
+ case X86::VCVTSI2SHZrrb_Int:
+ case X86::VCVTSI2SHZrm_Int:
+ case X86::VCVTSI642SHZrr:
+ case X86::VCVTSI642SHZrm:
+ case X86::VCVTSI642SHZrr_Int:
+ case X86::VCVTSI642SHZrrb_Int:
+ case X86::VCVTSI642SHZrm_Int:
+ case X86::VCVTUSI2SHZrr:
+ case X86::VCVTUSI2SHZrm:
+ case X86::VCVTUSI2SHZrr_Int:
+ case X86::VCVTUSI2SHZrrb_Int:
+ case X86::VCVTUSI2SHZrm_Int:
+ case X86::VCVTUSI642SHZrr:
+ case X86::VCVTUSI642SHZrm:
+ case X86::VCVTUSI642SHZrr_Int:
+ case X86::VCVTUSI642SHZrrb_Int:
+ case X86::VCVTUSI642SHZrm_Int:
// Load folding won't effect the undef register update since the input is
// a GPR.
return OpNum == 1 && !ForLoadFold;
@@ -5278,6 +5298,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VSQRTSDZrb_Int:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
+ case X86::VCVTSD2SHZrr:
+ case X86::VCVTSD2SHZrr_Int:
+ case X86::VCVTSD2SHZrrb_Int:
+ case X86::VCVTSD2SHZrm:
+ case X86::VCVTSD2SHZrm_Int:
+ case X86::VCVTSS2SHZrr:
+ case X86::VCVTSS2SHZrr_Int:
+ case X86::VCVTSS2SHZrrb_Int:
+ case X86::VCVTSS2SHZrm:
+ case X86::VCVTSS2SHZrm_Int:
+ case X86::VCVTSH2SDZrr:
+ case X86::VCVTSH2SDZrr_Int:
+ case X86::VCVTSH2SDZrrb_Int:
+ case X86::VCVTSH2SDZrm:
+ case X86::VCVTSH2SDZrm_Int:
+ case X86::VCVTSH2SSZrr:
+ case X86::VCVTSH2SSZrr_Int:
+ case X86::VCVTSH2SSZrrb_Int:
+ case X86::VCVTSH2SSZrm:
+ case X86::VCVTSH2SSZrm_Int:
return OpNum == 1;
case X86::VMOVSSZrrk:
case X86::VMOVSDZrrk:
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 70d7cecce31bd..3cf6ad248e9c5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1747,20 +1747,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
} // Predicates = [HasAVX, NoVLX]
@@ -1771,11 +1771,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
Sched<[WriteCvtPD2PS]>, SIMD_EXC;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 8c33624e28f0d..db7e42b20cb14 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1006,6 +1006,117 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS, X86ISD::FMULS_RND),
X86_INTRINSIC_DATA(avx512fp16_mask_sub_sh_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FSUBS, X86ISD::FSUBS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtdq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_256, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_256, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_256, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsd2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtss2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtudq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
X86_INTRINSIC_DATA(avx512fp16_max_ph_128, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512fp16_max_ph_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512fp16_max_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
@@ -1015,6 +1126,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512fp16_mul_ph_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512fp16_sub_ph_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512fp16_vcomi_sh, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+ /*fp16 scalar convert instruction*/
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
index 2a0433cd23071..8148585f8d793 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
@@ -282,3 +282,364 @@ define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1
%res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
ret <32 x half> %res1
}
+
+declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32)
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd {sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(<8 x half>* %px0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2pd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <8 x half>, <8 x half>* %px0, align 16
+ %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ ret <8 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(<8 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtpd2phz (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <8 x double>, <8 x double>* %px0, align 64
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
+ ret <8 x half> %res
+}
+
+declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32)
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32)
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4)
+ ret <2 x double> %res
+}
+
+declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32)
+
+define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8)
+ ret <16 x float> %res
+}
+
+declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32)
+
+define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4)
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4)
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4)
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2phx {ru-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9)
+ %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10)
+ %res2 = fadd <16 x half> %res, %res1
+ ret <16 x half> %res2
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
index da79411006d18..d827206318e76 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
@@ -402,3 +402,403 @@ define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
%res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
ret <16 x half> %res0
}
+
+declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
+
+define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
+ ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
+
+define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
+ ret <2 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <4 x double>, <4 x double>* %px0, align 32
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <2 x double>, <2 x double>* %px0, align 16
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
+
+define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
+
+define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
+ ret <8 x float> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
+ %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
+ %res2 = fadd <8 x half> %res, %res1
+ ret <8 x half> %res2
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
index 1b234387c07c2..194b1c48c3846 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
@@ -274,6 +274,68 @@ entry:
ret <32 x i1> %0
}
+define <8 x half> @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
+; CHECK-LABEL: regression_test1:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm2
+; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
+; CHECK-NEXT: retq
+entry:
+ %a = fsub <8 x half> %x, %y
+ %b = fadd <8 x half> %x, %y
+ %c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x half> %c
+}
+
+define <8 x i16> @regression_test2(<8 x float> %x) #0 {
+; CHECK-LABEL: regression_test2:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = fptoui <8 x float> %x to <8 x i16>
+ ret <8 x i16> %a
+}
+
+define <8 x i16> @regression_test3(<8 x float> %x) #0 {
+; CHECK-LABEL: regression_test3:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = fptosi <8 x float> %x to <8 x i16>
+ ret <8 x i16> %a
+}
+
+define <8 x i16> @regression_test4(<8 x double> %x) #0 {
+; CHECK-LABEL: regression_test4:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = fptoui <8 x double> %x to <8 x i16>
+ ret <8 x i16> %a
+}
+
+define <8 x i16> @regression_test5(<8 x double> %x) #0 {
+; CHECK-LABEL: regression_test5:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %a = fptosi <8 x double> %x to <8 x i16>
+ ret <8 x i16> %a
+}
+
define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
; CHECK-LABEL: fcmp_v8f16:
; CHECK: ## %bb.0: ## %entry
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
new file mode 100644
index 0000000000000..e1bf6e3ba01ae
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
@@ -0,0 +1,549 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s
+
+declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = sitofp <32 x i16> %arg0 to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+ %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+ %res0 = sitofp <32 x i16> %val to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = sitofp <32 x i16> %arg0 to <32 x half>
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = sitofp <32 x i16> %arg0 to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %val = load <32 x i16>, <32 x i16>* %arg0
+ %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %val = load <32 x i16>, <32 x i16>* %arg0
+ %res0 = sitofp <32 x i16> %val to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+ %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2w %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <32 x half>, <32 x half>* %arg0
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+
+declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = uitofp <32 x i16> %arg0 to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+ %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+ %res0 = uitofp <32 x i16> %val to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = uitofp <32 x i16> %arg0 to <32 x half>
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %res0 = uitofp <32 x i16> %arg0 to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %val = load <32 x i16>, <32 x i16>* %arg0
+ %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i32 %mask to <32 x i1>
+ %val = load <32 x i16>, <32 x i16>* %arg0
+ %res0 = uitofp <32 x i16> %val to <32 x half>
+ %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+ ret <32 x half> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+ %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <32 x half>, <32 x half>* %arg0
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+ %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <32 x half>, <32 x half>* %arg0
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+ %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <32 x half>, <32 x half>* %arg0
+ %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+ ret <32 x i16> %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
new file mode 100644
index 0000000000000..d17b677276b99
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -0,0 +1,770 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %res0 = sitofp <16 x i16> %arg0 to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
+ %res0 = sitofp <16 x i16> %val to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = sitofp <16 x i16> %arg0 to <16 x half>
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %res0 = sitofp <16 x i16> %arg0 to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %val = load <16 x i16>, <16 x i16>* %arg0
+ %res0 = sitofp <16 x i16> %val to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+ %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2w %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <16 x half>, <16 x half>* %arg0
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %res0 = uitofp <16 x i16> %arg0 to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
+ %res0 = uitofp <16 x i16> %val to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = uitofp <16 x i16> %arg0 to <16 x half>
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %res0 = uitofp <16 x i16> %arg0 to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i16 %mask to <16 x i1>
+ %val = load <16 x i16>, <16 x i16>* %arg0
+ %res0 = uitofp <16 x i16> %val to <16 x half>
+ %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+ ret <16 x half> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+ %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <16 x half>, <16 x half>* %arg0
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+ %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <16 x half>, <16 x half>* %arg0
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+ %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <16 x half>, <16 x half>* %arg0
+ %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %res0 = sitofp <8 x i16> %arg0 to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
+ %res0 = sitofp <8 x i16> %val to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <8 x i16> %arg0 to <8 x half>
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %res0 = sitofp <8 x i16> %arg0 to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtw2ph (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %val = load <8 x i16>, <8 x i16>* %arg0
+ %res0 = sitofp <8 x i16> %val to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+ %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2w %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2w %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2w (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <8 x half>, <8 x half>* %arg0
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %res0 = uitofp <8 x i16> %arg0 to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %scalar = load i16, i16* %arg0
+ %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
+ %res0 = uitofp <8 x i16> %val to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <8 x i16> %arg0 to <8 x half>
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %res0 = uitofp <8 x i16> %arg0 to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtuw2ph (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %msk = bitcast i8 %mask to <8 x i1>
+ %val = load <8 x i16>, <8 x i16>* %arg0
+ %res0 = uitofp <8 x i16> %val to <8 x half>
+ %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+ ret <8 x half> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+ %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtph2uw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <8 x half>, <8 x half>* %arg0
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+ %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2w (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <8 x half>, <8 x half>* %arg0
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %scalar = load half, half* %arg0
+ %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+ %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvttph2uw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %val = load <8 x half>, <8 x half>* %arg0
+ %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <4 x half> @test_u16tofp4(<4 x i16> %arg0) {
+; CHECK-LABEL: test_u16tofp4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <4 x i16> %arg0 to <4 x half>
+ ret <4 x half> %res
+}
+
+define <2 x half> @test_s16tofp2(<2 x i16> %arg0) {
+; CHECK-LABEL: test_s16tofp2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <2 x i16> %arg0 to <2 x half>
+ ret <2 x half> %res
+}
+
+define <4 x half> @test_u8tofp4(<4 x i8> %arg0) {
+; CHECK-LABEL: test_u8tofp4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <4 x i8> %arg0 to <4 x half>
+ ret <4 x half> %res
+}
+
+define <2 x half> @test_s8tofp2(<2 x i8> %arg0) {
+; CHECK-LABEL: test_s8tofp2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <2 x i8> %arg0 to <2 x half>
+ ret <2 x half> %res
+}
+
+define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
+; CHECK-LABEL: test_u1tofp2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <2 x i1> %arg0 to <2 x half>
+ ret <2 x half> %res
+}
+
+define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
+; CHECK-LABEL: test_s17tofp4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $15, %xmm0, %xmm0
+; CHECK-NEXT: vpsrad $15, %xmm0, %xmm0
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <4 x i17> %arg0 to <4 x half>
+ ret <4 x half> %res
+}
+
+define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
+; CHECK-LABEL: test_u33tofp2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <2 x i33> %arg0 to <2 x half>
+ ret <2 x half> %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
new file mode 100644
index 0000000000000..e19ea8426e8ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -0,0 +1,1029 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
+
+define half @f32tof16(float %b) nounwind {
+; X64-LABEL: f32tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: f32tof16:
+; X86: # %bb.0:
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = fptrunc float %b to half
+ ret half %a
+}
+
+define half @f64tof16(double %b) nounwind {
+; X64-LABEL: f64tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: f64tof16:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = fptrunc double %b to half
+ ret half %a
+}
+
+define <16 x half> @f32to16f16(<16 x float> %b) nounwind {
+; CHECK-LABEL: f32to16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <16 x float> %b to <16 x half>
+ ret <16 x half> %a
+}
+
+define <8 x half> @f32to8f16(<8 x float> %b) {
+; CHECK-LABEL: f32to8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <8 x float> %b to <8 x half>
+ ret <8 x half> %a
+}
+
+define <4 x half> @f32to4f16(<4 x float> %b) {
+; CHECK-LABEL: f32to4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <4 x float> %b to <4 x half>
+ ret <4 x half> %a
+}
+
+define <2 x half> @f32to2f16(<2 x float> %b) {
+; CHECK-LABEL: f32to2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <2 x float> %b to <2 x half>
+ ret <2 x half> %a
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
+
+define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) {
+; X64-LABEL: f32to4f16_mask:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
+; X64-NEXT: vmovaps %xmm1, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: f32to4f16_mask:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1}
+; X86-NEXT: vmovaps %xmm1, %xmm0
+; X86-NEXT: retl
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %a, <8 x half> %b, i8 %mask)
+ ret <8 x half> %res
+}
+
+define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) {
+; X64-LABEL: f32to8f16_mask:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
+; X64-NEXT: vmovaps %xmm1, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+;
+; X86-LABEL: f32to8f16_mask:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1}
+; X86-NEXT: vmovaps %xmm1, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %a, <8 x half> %b, i8 %mask)
+ ret <8 x half> %res
+}
+
+define <8 x half> @f32to8f16_mask2(<8 x float> %b, <8 x i1> %mask) {
+; CHECK-LABEL: f32to8f16_mask2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1
+; CHECK-NEXT: vpmovw2m %xmm1, %k1
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <8 x float> %b to <8 x half>
+ %c = select <8 x i1>%mask, <8 x half>%a, <8 x half> zeroinitializer
+ ret <8 x half> %c
+}
+
+define <16 x half> @f32to16f16_mask(<16 x float> %b, <16 x i1> %mask) {
+; CHECK-LABEL: f32to16f16_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1
+; CHECK-NEXT: vpmovb2m %xmm1, %k1
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <16 x float> %b to <16 x half>
+ %c = select <16 x i1>%mask, <16 x half>%a, <16 x half> zeroinitializer
+ ret <16 x half> %c
+}
+
+define float @f16tof32(half %b) nounwind {
+; X64-LABEL: f16tof32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: f16tof32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+ %a = fpext half %b to float
+ ret float %a
+}
+
+define double @f16tof64(half %b) nounwind {
+; X64-LABEL: f16tof64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: f16tof64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: vmovsh 8(%ebp), %xmm0
+; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %a = fpext half %b to double
+ ret double %a
+}
+
+define <16 x float> @f16to16f32(<16 x half> %b) nounwind {
+; CHECK-LABEL: f16to16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <16 x half> %b to <16 x float>
+ ret <16 x float> %a
+}
+
+define <8 x float> @f16to8f32(<8 x half> %b) nounwind {
+; CHECK-LABEL: f16to8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <8 x half> %b to <8 x float>
+ ret <8 x float> %a
+}
+
+define <4 x float> @f16to4f32(<4 x half> %b) nounwind {
+; CHECK-LABEL: f16to4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <4 x half> %b to <4 x float>
+ ret <4 x float> %a
+}
+
+define <2 x float> @f16to2f32(<2 x half> %b) nounwind {
+; CHECK-LABEL: f16to2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <2 x half> %b to <2 x float>
+ ret <2 x float> %a
+}
+
+define <16 x float> @f16to16f32_mask(<16 x half> %b, <16 x float> %b1, <16 x float> %a1) {
+; CHECK-LABEL: f16to16f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltps %zmm2, %zmm1, %k1
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <16 x half> %b to <16 x float>
+ %mask = fcmp ogt <16 x float> %a1, %b1
+ %c = select <16 x i1> %mask, <16 x float> %a, <16 x float> zeroinitializer
+ ret <16 x float> %c
+}
+
+define <8 x float> @f16to8f32_mask(<8 x half> %b, <8 x float> %b1, <8 x float> %a1) {
+; CHECK-LABEL: f16to8f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltps %ymm2, %ymm1, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <8 x half> %b to <8 x float>
+ %mask = fcmp ogt <8 x float> %a1, %b1
+ %c = select <8 x i1> %mask, <8 x float> %a, <8 x float> zeroinitializer
+ ret <8 x float> %c
+}
+
+define <4 x float> @f16to4f32_mask(<4 x half> %b, <4 x float> %b1, <4 x float> %a1) {
+; CHECK-LABEL: f16to4f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <4 x half> %b to <4 x float>
+ %mask = fcmp ogt <4 x float> %a1, %b1
+ %c = select <4 x i1> %mask, <4 x float> %a, <4 x float> zeroinitializer
+ ret <4 x float> %c
+}
+
+define <2 x float> @f16to2f32_mask(<2 x half> %b, <2 x float> %b1, <2 x float> %a1) {
+; CHECK-LABEL: f16to2f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltps %xmm2, %xmm1, %k1
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <2 x half> %b to <2 x float>
+ %mask = fcmp ogt <2 x float> %a1, %b1
+ %c = select <2 x i1> %mask, <2 x float> %a, <2 x float> zeroinitializer
+ ret <2 x float> %c
+}
+
+define <2 x double> @f16to2f64(<2 x half> %b) nounwind {
+; CHECK-LABEL: f16to2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <2 x half> %b to <2 x double>
+ ret <2 x double> %a
+}
+
+define <2 x double> @f16to2f64_mask(<2 x half> %b, <2 x double> %b1, <2 x double> %a1) {
+; CHECK-LABEL: f16to2f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltpd %xmm2, %xmm1, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <2 x half> %b to <2 x double>
+ %mask = fcmp ogt <2 x double> %a1, %b1
+ %c = select <2 x i1> %mask, <2 x double> %a, <2 x double> zeroinitializer
+ ret <2 x double> %c
+}
+
+define <4 x double> @f16to4f64(<4 x half> %b) nounwind {
+; CHECK-LABEL: f16to4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <4 x half> %b to <4 x double>
+ ret <4 x double> %a
+}
+
+define <4 x double> @f16to4f64_mask(<4 x half> %b, <4 x double> %b1, <4 x double> %a1) {
+; CHECK-LABEL: f16to4f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltpd %ymm2, %ymm1, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <4 x half> %b to <4 x double>
+ %mask = fcmp ogt <4 x double> %a1, %b1
+ %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
+ ret <4 x double> %c
+}
+
+define <8 x double> @f16to8f64(<8 x half> %b) nounwind {
+; CHECK-LABEL: f16to8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <8 x half> %b to <8 x double>
+ ret <8 x double> %a
+}
+
+define <8 x double> @f16to8f64_mask(<8 x half> %b, <8 x double> %b1, <8 x double> %a1) {
+; CHECK-LABEL: f16to8f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpltpd %zmm2, %zmm1, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fpext <8 x half> %b to <8 x double>
+ %mask = fcmp ogt <8 x double> %a1, %b1
+ %c = select <8 x i1> %mask, <8 x double> %a, <8 x double> zeroinitializer
+ ret <8 x double> %c
+}
+
+define <2 x half> @f64to2f16(<2 x double> %b) {
+; CHECK-LABEL: f64to2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <2 x double> %b to <2 x half>
+ ret <2 x half> %a
+}
+
+define <4 x half> @f64to4f16(<4 x double> %b) {
+; CHECK-LABEL: f64to4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <4 x double> %b to <4 x half>
+ ret <4 x half> %a
+}
+
+define <8 x half> @f64to8f16(<8 x double> %b) {
+; CHECK-LABEL: f64to8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %a = fptrunc <8 x double> %b to <8 x half>
+ ret <8 x half> %a
+}
+
+define float @extload_f16_f32(half* %x) {
+; X64-LABEL: extload_f16_f32:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh (%rdi), %xmm0
+; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_f16_f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vmovsh (%eax), %xmm0
+; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+ %a = load half, half* %x
+ %b = fpext half %a to float
+ ret float %b
+}
+
+define double @extload_f16_f64(half* %x) {
+; X64-LABEL: extload_f16_f64:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh (%rdi), %xmm0
+; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_f16_f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: vmovsh (%eax), %xmm0
+; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+ %a = load half, half* %x
+ %b = fpext half %a to double
+ ret double %b
+}
+
+define float @extload_f16_f32_optsize(half* %x) optsize {
+; X64-LABEL: extload_f16_f32_optsize:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsh2ss (%rdi), %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_f16_f32_optsize:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsh2ss (%eax), %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+ %a = load half, half* %x
+ %b = fpext half %a to float
+ ret float %b
+}
+
+define double @extload_f16_f64_optsize(half* %x) optsize {
+; X64-LABEL: extload_f16_f64_optsize:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsh2sd (%rdi), %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_f16_f64_optsize:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: vcvtsh2sd (%eax), %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+ %a = load half, half* %x
+ %b = fpext half %a to double
+ ret double %b
+}
+
+define <16 x float> @extload_v16f16_v16f32(<16 x half>* %x) {
+; X64-LABEL: extload_v16f16_v16f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2psx (%rdi), %zmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v16f16_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2psx (%eax), %zmm0
+; X86-NEXT: retl
+ %a = load <16 x half>, <16 x half>* %x
+ %b = fpext <16 x half> %a to <16 x float>
+ ret <16 x float> %b
+}
+
+define <8 x float> @extload_v8f16_v8f32(<8 x half>* %x) {
+; X64-LABEL: extload_v8f16_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2psx (%rdi), %ymm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v8f16_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2psx (%eax), %ymm0
+; X86-NEXT: retl
+ %a = load <8 x half>, <8 x half>* %x
+ %b = fpext <8 x half> %a to <8 x float>
+ ret <8 x float> %b
+}
+
+define <4 x float> @extload_v4f16_v4f32(<4 x half>* %x) {
+; X64-LABEL: extload_v4f16_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2psx (%rdi), %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v4f16_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2psx (%eax), %xmm0
+; X86-NEXT: retl
+ %a = load <4 x half>, <4 x half>* %x
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+define <8 x double> @extload_v8f16_v8f64(<8 x half>* %x) {
+; X64-LABEL: extload_v8f16_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2pd (%rdi), %zmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v8f16_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2pd (%eax), %zmm0
+; X86-NEXT: retl
+ %a = load <8 x half>, <8 x half>* %x
+ %b = fpext <8 x half> %a to <8 x double>
+ ret <8 x double> %b
+}
+
+define <4 x double> @extload_v4f16_v4f64(<4 x half>* %x) {
+; X64-LABEL: extload_v4f16_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2pd (%rdi), %ymm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v4f16_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2pd (%eax), %ymm0
+; X86-NEXT: retl
+ %a = load <4 x half>, <4 x half>* %x
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+define <2 x double> @extload_v2f16_v2f64(<2 x half>* %x) {
+; X64-LABEL: extload_v2f16_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtph2pd (%rdi), %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: extload_v2f16_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtph2pd (%eax), %xmm0
+; X86-NEXT: retl
+ %a = load <2 x half>, <2 x half>* %x
+ %b = fpext <2 x half> %a to <2 x double>
+ ret <2 x double> %b
+}
+
+define half @s8_to_half(i8 %x) {
+; X64-LABEL: s8_to_half:
+; X64: # %bb.0:
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: s8_to_half:
+; X86: # %bb.0:
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = sitofp i8 %x to half
+ ret half %a
+}
+
+define half @s16_to_half(i16 %x) {
+; X64-LABEL: s16_to_half:
+; X64: # %bb.0:
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: s16_to_half:
+; X86: # %bb.0:
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = sitofp i16 %x to half
+ ret half %a
+}
+
+define half @s32_to_half(i32 %x) {
+; X64-LABEL: s32_to_half:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: s32_to_half:
+; X86: # %bb.0:
+; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = sitofp i32 %x to half
+ ret half %a
+}
+
+define half @s64_to_half(i64 %x) {
+; X64-LABEL: s64_to_half:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: s64_to_half:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = sitofp i64 %x to half
+ ret half %a
+}
+
+define half @s128_to_half(i128 %x) {
+; X64-LABEL: s128_to_half:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __floattihf at PLT
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X86-LABEL: s128_to_half:
+; X86: # %bb.0:
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovups %xmm0, (%esp)
+; X86-NEXT: calll __floattihf
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+ %a = sitofp i128 %x to half
+ ret half %a
+}
+
+define half @u8_to_half(i8 %x) {
+; X64-LABEL: u8_to_half:
+; X64: # %bb.0:
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: u8_to_half:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = uitofp i8 %x to half
+ ret half %a
+}
+
+define half @u16_to_half(i16 %x) {
+; X64-LABEL: u16_to_half:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: u16_to_half:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = uitofp i16 %x to half
+ ret half %a
+}
+
+define half @u32_to_half(i32 %x) {
+; X64-LABEL: u32_to_half:
+; X64: # %bb.0:
+; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: u32_to_half:
+; X86: # %bb.0:
+; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = uitofp i32 %x to half
+ ret half %a
+}
+
+define half @u64_to_half(i64 %x) {
+; X64-LABEL: u64_to_half:
+; X64: # %bb.0:
+; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: u64_to_half:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; X86-NEXT: retl
+ %a = uitofp i64 %x to half
+ ret half %a
+}
+
+define half @u128_to_half(i128 %x) {
+; X64-LABEL: u128_to_half:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __floatuntihf at PLT
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X86-LABEL: u128_to_half:
+; X86: # %bb.0:
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovups %xmm0, (%esp)
+; X86-NEXT: calll __floatuntihf
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+ %a = uitofp i128 %x to half
+ ret half %a
+}
+
+define i8 @half_to_s8(half %x) {
+; X64-LABEL: half_to_s8:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_s8:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+ %a = fptosi half %x to i8
+ ret i8 %a
+}
+
+define i16 @half_to_s16(half %x) {
+; X64-LABEL: half_to_s16:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_s16:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+ %a = fptosi half %x to i16
+ ret i16 %a
+}
+
+define i32 @half_to_s32(half %x) {
+; X64-LABEL: half_to_s32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_s32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %a = fptosi half %x to i32
+ ret i32 %a
+}
+
+define i64 @half_to_s64(half %x) {
+; X64-LABEL: half_to_s64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_s64:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vcvttph2qq %xmm0, %xmm0
+; X86-NEXT: vmovd %xmm0, %eax
+; X86-NEXT: vpextrd $1, %xmm0, %edx
+; X86-NEXT: retl
+ %a = fptosi half %x to i64
+ ret i64 %a
+}
+
+define i128 @half_to_s128(half %x) {
+; X64-LABEL: half_to_s128:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __fixhfti at PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_s128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: .cfi_offset %esi, -12
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: vmovsh 12(%ebp), %xmm0
+; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: calll __fixhfti
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovups %xmm0, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -4(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl $4
+ %a = fptosi half %x to i128
+ ret i128 %a
+}
+
+define i8 @half_to_u8(half %x) {
+; X64-LABEL: half_to_u8:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_u8:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+ %a = fptoui half %x to i8
+ ret i8 %a
+}
+
+define i16 @half_to_u16(half %x) {
+; X64-LABEL: half_to_u16:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_u16:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+ %a = fptoui half %x to i16
+ ret i16 %a
+}
+
+define i32 @half_to_u32(half %x) {
+; X64-LABEL: half_to_u32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2usi %xmm0, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_u32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+ %a = fptoui half %x to i32
+ ret i32 %a
+}
+
+define i64 @half_to_u64(half %x) {
+; X64-LABEL: half_to_u64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2usi %xmm0, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_u64:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vcvttph2uqq %xmm0, %xmm0
+; X86-NEXT: vmovd %xmm0, %eax
+; X86-NEXT: vpextrd $1, %xmm0, %edx
+; X86-NEXT: retl
+ %a = fptoui half %x to i64
+ ret i64 %a
+}
+
+define i128 @half_to_u128(half %x) {
+; X64-LABEL: half_to_u128:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __fixunshfti at PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_u128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: .cfi_offset %esi, -12
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: vmovsh 12(%ebp), %xmm0
+; X86-NEXT: vmovsh %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: calll __fixunshfti
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovups %xmm0, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -4(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl $4
+ %a = fptoui half %x to i128
+ ret i128 %a
+}
+
+define x86_fp80 @half_to_f80(half %x) nounwind {
+; X64-LABEL: half_to_f80:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq __extendhfxf2 at PLT
+; X64-NEXT: popq %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: half_to_f80:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovsh %xmm0, (%esp)
+; X86-NEXT: calll __extendhfxf2
+; X86-NEXT: popl %eax
+; X86-NEXT: retl
+ %a = fpext half %x to x86_fp80
+ ret x86_fp80 %a
+}
+
+define half @f80_to_half(x86_fp80 %x) nounwind {
+; X64-LABEL: f80_to_half:
+; X64: # %bb.0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: callq __truncxfhf2 at PLT
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+;
+; X86-LABEL: f80_to_half:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: calll __truncxfhf2
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+ %a = fptrunc x86_fp80 %x to half
+ ret half %a
+}
+
+; FIXME: We're doing a two step conversion here on 32-bit.
+; First from f16->f32 then f32->f128. This is occuring
+; due to common code in LegalizeFloatTypes that thinks
+; there are no libcalls for f16 to any type but f32.
+; Changing this may break other non-x86 targets. The code
+; generated here should be functional.
+define fp128 @half_to_f128(half %x) nounwind {
+; X64-LABEL: half_to_f128:
+; X64: # %bb.0:
+; X64-NEXT: jmp __extendhftf2 at PLT # TAILCALL
+;
+; X86-LABEL: half_to_f128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: vmovsh 12(%ebp), %xmm0
+; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: calll __extendsftf2
+; X86-NEXT: subl $4, %esp
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovaps %xmm0, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: leal -4(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %a = fpext half %x to fp128
+ ret fp128 %a
+}
+
+define half @f128_to_half(fp128 %x) nounwind {
+; X64-LABEL: f128_to_half:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: callq __trunctfhf2 at PLT
+; X64-NEXT: popq %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: f128_to_half:
+; X86: # %bb.0:
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovups %xmm0, (%esp)
+; X86-NEXT: calll __trunctfhf2
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: retl
+ %a = fptrunc fp128 %x to half
+ ret half %a
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index eb6511e0edc73..cb31baf9a82ea 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -187,3 +187,506 @@ define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i
%res13 = and i8 %res11, %res12
ret i8 %res13
}
+
+declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %mask = bitcast i16 %x2 to <16 x i1>
+ %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+ %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %mask = bitcast i16 %x2 to <16 x i1>
+ %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
+ %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %mask = bitcast i16 %x2 to <16 x i1>
+ %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+ %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
+ ret <16 x half> %res
+}
+
+define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = sitofp <16 x i32> %x to <16 x half>
+ ret <16 x half> %res
+}
+
+declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %mask = bitcast i16 %x2 to <16 x i1>
+ %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
+ %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+ ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %mask = bitcast i16 %x2 to <16 x i1>
+ %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+ %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
+ ret <16 x half> %res
+}
+
+define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = uitofp <16 x i32> %x to <16 x half>
+ ret <16 x half> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
+ %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2si %xmm0, %ecx
+; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
+ %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
+ %res = add i32 %res1, %res2
+ ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2si %xmm0, %rcx
+; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
+ %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
+ %res = add i64 %res1, %res2
+ ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsh2si %xmm0, %ecx
+; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
+ %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
+ %res = add i32 %res1, %res2
+ ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsh2si %xmm0, %rcx
+; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
+ %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
+ %res = add i64 %res1, %res2
+ ret i64 %res
+}
+
+
+declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx
+; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
+ %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
+ %res = add i32 %res1, %res2
+ ret i32 %res
+}
+
+
+declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx
+; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
+ %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
+ %res = add i64 %res1, %res2
+ ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx
+; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
+ %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
+ %res = add i32 %res1, %res2
+ ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx
+; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
+ %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
+ %res = add i64 %res1, %res2
+ ret i64 %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1
+; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
+ %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
+ %res = fadd <8 x half> %res1, %res2
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1
+; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
+ %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
+ %res = fadd <8 x half> %res1, %res2
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1
+; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
+ %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
+ %res = fadd <8 x half> %res1, %res2
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1
+; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
+ %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
+ %res = fadd <8 x half> %res1, %res2
+ ret <8 x half> %res
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
index 42a5eecadda67..6bfd69ef5db97 100644
--- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
@@ -23,3 +23,925 @@ entry:
%0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
ret <2 x i64> %0
}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = sitofp <8 x i32> %x0 to <8 x half>
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = sitofp <8 x i32> %x0 to <8 x half>
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = sitofp <8 x i32> %x to <8 x half>
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <4 x i32> %x to <4 x half>
+ ret <4 x half> %res
+}
+
+define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <2 x i32> %x to <2 x half>
+ ret <2 x half> %res
+}
+
+define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
+; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptosi <4 x half> %x to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptosi <2 x half> %x to <2 x i32>
+ ret <2 x i32> %res
+}
+
+define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptosi <2 x half> %x to <2 x i16>
+ ret <2 x i16> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = uitofp <8 x i32> %x0 to <8 x half>
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %mask = bitcast i8 %x2 to <8 x i1>
+ %res0 = uitofp <8 x i32> %x0 to <8 x half>
+ %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+ ret <8 x half> %res
+}
+
+define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = uitofp <8 x i32> %x to <8 x half>
+ ret <8 x half> %res
+}
+
+define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
+; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fptoui <8 x half> %x to <8 x i32>
+ ret <8 x i32> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <4 x i32> %x to <4 x half>
+ ret <4 x half> %res
+}
+
+define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <2 x i32> %x to <2 x half>
+ ret <2 x half> %res
+}
+
+define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
+; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptoui <4 x half> %x to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptoui <2 x half> %x to <2 x i32>
+ ret <2 x i32> %res
+}
+
+define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptoui <2 x half> %x to <2 x i16>
+ ret <2 x i16> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
+
+define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
+ ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
+
+define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
+ ret <2 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <4 x double>, <4 x double>* %px0, align 32
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0 = load <2 x double>, <2 x double>* %px0, align 16
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
+; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = sitofp <4 x i64> %x to <4 x half>
+ ret <4 x half> %res
+}
+
+define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
+; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fptosi <4 x half> %x to <4 x i64>
+ ret <4 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
+; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = sitofp <2 x i64> %x to <2 x half>
+ ret <2 x half> %res
+}
+
+define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptosi <2 x half> %x to <2 x i64>
+ ret <2 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
+; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %res = uitofp <4 x i64> %x to <4 x half>
+ ret <4 x half> %res
+}
+
+define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
+; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fptoui <4 x half> %x to <4 x i64>
+ ret <4 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
+ ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+ ret <8 x half> %res
+}
+
+define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
+; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = uitofp <2 x i64> %x to <2 x half>
+ ret <2 x half> %res
+}
+
+define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = fptoui <2 x half> %x to <2 x i64>
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)
+
+define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ ret <4 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)
+
+define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vmovaps %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ ret <4 x i64> %res
+}
diff --git a/llvm/test/CodeGen/X86/cvt16-2.ll b/llvm/test/CodeGen/X86/cvt16-2.ll
new file mode 100644
index 0000000000000..67111e838cab8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cvt16-2.ll
@@ -0,0 +1,157 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-avx512fp16 | FileCheck %s -check-prefix=LIBCALL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512fp16 | FileCheck %s -check-prefix=FP16
+
+define void @test1(float %src, i16* %dest) {
+; LIBCALL-LABEL: test1:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: pushq %rbx
+; LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; LIBCALL-NEXT: .cfi_offset %rbx, -16
+; LIBCALL-NEXT: movq %rdi, %rbx
+; LIBCALL-NEXT: callq __gnu_f2h_ieee at PLT
+; LIBCALL-NEXT: movw %ax, (%rbx)
+; LIBCALL-NEXT: popq %rbx
+; LIBCALL-NEXT: .cfi_def_cfa_offset 8
+; LIBCALL-NEXT: retq
+;
+; FP16-LABEL: test1:
+; FP16: # %bb.0:
+; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT: vmovsh %xmm0, (%rdi)
+; FP16-NEXT: retq
+ %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
+ store i16 %1, i16* %dest, align 2
+ ret void
+}
+
+define float @test2(i16* nocapture %src) {
+; LIBCALL-LABEL: test2:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: movzwl (%rdi), %edi
+; LIBCALL-NEXT: jmp __gnu_h2f_ieee at PLT # TAILCALL
+;
+; FP16-LABEL: test2:
+; FP16: # %bb.0:
+; FP16-NEXT: vmovsh (%rdi), %xmm0
+; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT: retq
+ %1 = load i16, i16* %src, align 2
+ %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
+ ret float %2
+}
+
+define float @test3(float %src) nounwind uwtable readnone {
+; LIBCALL-LABEL: test3:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: pushq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; LIBCALL-NEXT: callq __gnu_f2h_ieee at PLT
+; LIBCALL-NEXT: movzwl %ax, %edi
+; LIBCALL-NEXT: popq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 8
+; LIBCALL-NEXT: jmp __gnu_h2f_ieee at PLT # TAILCALL
+;
+; FP16-LABEL: test3:
+; FP16: # %bb.0:
+; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT: retq
+ %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
+ %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
+ ret float %2
+}
+
+; FIXME: Should it be __extendhfdf2?
+define double @test4(i16* nocapture %src) {
+; LIBCALL-LABEL: test4:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: pushq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; LIBCALL-NEXT: movzwl (%rdi), %edi
+; LIBCALL-NEXT: callq __gnu_h2f_ieee at PLT
+; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
+; LIBCALL-NEXT: popq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 8
+; LIBCALL-NEXT: retq
+;
+; FP16-LABEL: test4:
+; FP16: # %bb.0:
+; FP16-NEXT: vmovsh (%rdi), %xmm0
+; FP16-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; FP16-NEXT: retq
+ %1 = load i16, i16* %src, align 2
+ %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
+ ret double %2
+}
+
+define i16 @test5(double %src) {
+; LIBCALL-LABEL: test5:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: jmp __truncdfhf2 at PLT # TAILCALL
+;
+; FP16-LABEL: test5:
+; FP16: # %bb.0:
+; FP16-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT: vmovw %xmm0, %eax
+; FP16-NEXT: # kill: def $ax killed $ax killed $eax
+; FP16-NEXT: retq
+ %val = tail call i16 @llvm.convert.to.fp16.f64(double %src)
+ ret i16 %val
+}
+
+; FIXME: Should it be __extendhfxf2?
+define x86_fp80 @test6(i16* nocapture %src) {
+; LIBCALL-LABEL: test6:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: pushq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; LIBCALL-NEXT: movzwl (%rdi), %edi
+; LIBCALL-NEXT: callq __gnu_h2f_ieee at PLT
+; LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp)
+; LIBCALL-NEXT: flds {{[0-9]+}}(%rsp)
+; LIBCALL-NEXT: popq %rax
+; LIBCALL-NEXT: .cfi_def_cfa_offset 8
+; LIBCALL-NEXT: retq
+;
+; FP16-LABEL: test6:
+; FP16: # %bb.0:
+; FP16-NEXT: pushq %rax
+; FP16-NEXT: .cfi_def_cfa_offset 16
+; FP16-NEXT: vmovsh (%rdi), %xmm0
+; FP16-NEXT: callq __extendhfxf2 at PLT
+; FP16-NEXT: popq %rax
+; FP16-NEXT: .cfi_def_cfa_offset 8
+; FP16-NEXT: retq
+ %1 = load i16, i16* %src, align 2
+ %2 = tail call x86_fp80 @llvm.convert.from.fp16.f80(i16 %1)
+ ret x86_fp80 %2
+}
+
+define i16 @test7(x86_fp80 %src) {
+; LIBCALL-LABEL: test7:
+; LIBCALL: # %bb.0:
+; LIBCALL-NEXT: jmp __truncxfhf2 at PLT # TAILCALL
+;
+; FP16-LABEL: test7:
+; FP16: # %bb.0:
+; FP16-NEXT: subq $24, %rsp
+; FP16-NEXT: .cfi_def_cfa_offset 32
+; FP16-NEXT: fldt {{[0-9]+}}(%rsp)
+; FP16-NEXT: fstpt (%rsp)
+; FP16-NEXT: callq __truncxfhf2 at PLT
+; FP16-NEXT: vmovw %xmm0, %eax
+; FP16-NEXT: # kill: def $ax killed $ax killed $eax
+; FP16-NEXT: addq $24, %rsp
+; FP16-NEXT: .cfi_def_cfa_offset 8
+; FP16-NEXT: retq
+ %val = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src)
+ ret i16 %val
+}
+
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
+declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
+declare x86_fp80 @llvm.convert.from.fp16.f80(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
index 9d58a262dcc6c..a2c289f1a26e4 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
@@ -6,6 +6,10 @@ declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metad
declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata)
declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
define half @fadd_f16(half %a, half %b) nounwind strictfp {
; X86-LABEL: fadd_f16:
@@ -75,4 +79,98 @@ define half @fdiv_f16(half %a, half %b) nounwind strictfp {
ret half %ret
}
+define void @fpext_f16_to_f32(half* %val, float* %ret) nounwind strictfp {
+; X86-LABEL: fpext_f16_to_f32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh (%ecx), %xmm0
+; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: fpext_f16_to_f32:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh (%rdi), %xmm0
+; X64-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovss %xmm0, (%rsi)
+; X64-NEXT: retq
+ %1 = load half, half* %val, align 4
+ %res = call float @llvm.experimental.constrained.fpext.f32.f16(half %1,
+ metadata !"fpexcept.strict") #0
+ store float %res, float* %ret, align 8
+ ret void
+}
+
+define void @fpext_f16_to_f64(half* %val, double* %ret) nounwind strictfp {
+; X86-LABEL: fpext_f16_to_f64:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsh (%ecx), %xmm0
+; X86-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: fpext_f16_to_f64:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh (%rdi), %xmm0
+; X64-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovsd %xmm0, (%rsi)
+; X64-NEXT: retq
+ %1 = load half, half* %val, align 4
+ %res = call double @llvm.experimental.constrained.fpext.f64.f16(half %1,
+ metadata !"fpexcept.strict") #0
+ store double %res, double* %ret, align 8
+ ret void
+}
+
+define void @fptrunc_float_to_f16(float* %val, half *%ret) nounwind strictfp {
+; X86-LABEL: fptrunc_float_to_f16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovsh %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: fptrunc_float_to_f16:
+; X64: # %bb.0:
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovsh %xmm0, (%rsi)
+; X64-NEXT: retq
+ %1 = load float, float* %val, align 8
+ %res = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store half %res, half* %ret, align 4
+ ret void
+}
+
+define void @fptrunc_double_to_f16(double* %val, half *%ret) nounwind strictfp {
+; X86-LABEL: fptrunc_double_to_f16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovsh %xmm0, (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: fptrunc_double_to_f16:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovsh %xmm0, (%rsi)
+; X64-NEXT: retq
+ %1 = load double, double* %val, align 8
+ %res = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store half %res, half* %ret, align 4
+ ret void
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
new file mode 100644
index 0000000000000..9ea19ca318816
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
+
+declare i1 @llvm.experimental.constrained.fptosi.i1.f16(half, metadata)
+declare i8 @llvm.experimental.constrained.fptosi.i8.f16(half, metadata)
+declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
+declare i1 @llvm.experimental.constrained.fptoui.i1.f16(half, metadata)
+declare i8 @llvm.experimental.constrained.fptoui.i8.f16(half, metadata)
+declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
+
+define i1 @fptosi_f16toi1(half %x) #0 {
+; X86-LABEL: fptosi_f16toi1:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptosi_f16toi1:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i1 %result
+}
+
+define i8 @fptosi_f16toi8(half %x) #0 {
+; X86-LABEL: fptosi_f16toi8:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptosi_f16toi8:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i8 %result
+}
+
+define i16 @fptosi_f16toi16(half %x) #0 {
+; X86-LABEL: fptosi_f16toi16:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptosi_f16toi16:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i16 %result
+}
+
+define i32 @fptosi_f16toi32(half %x) #0 {
+; X86-LABEL: fptosi_f16toi32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptosi_f16toi32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: retq
+ %result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i32 %result
+}
+
+define i64 @fptosi_f16toi64(half %x) #0 {
+; X86-LABEL: fptosi_f16toi64:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vcvttph2qq %xmm0, %xmm0
+; X86-NEXT: vmovd %xmm0, %eax
+; X86-NEXT: vpextrd $1, %xmm0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: fptosi_f16toi64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %rax
+; X64-NEXT: retq
+ %result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i64 %result
+}
+
+define i1 @fptoui_f16toi1(half %x) #0 {
+; X86-LABEL: fptoui_f16toi1:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptoui_f16toi1:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i1 %result
+}
+
+define i8 @fptoui_f16toi8(half %x) #0 {
+; X86-LABEL: fptoui_f16toi8:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptoui_f16toi8:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i8 %result
+}
+
+define i16 @fptoui_f16toi16(half %x) #0 {
+; X86-LABEL: fptoui_f16toi16:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptoui_f16toi16:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2si %xmm0, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i16 %result
+}
+
+define i32 @fptoui_f16toi32(half %x) #0 {
+; X86-LABEL: fptoui_f16toi32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsh2usi {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fptoui_f16toi32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2usi %xmm0, %eax
+; X64-NEXT: retq
+ %result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i32 %result
+}
+
+define i64 @fptoui_f16toi64(half %x) #0 {
+; X86-LABEL: fptoui_f16toi64:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vcvttph2uqq %xmm0, %xmm0
+; X86-NEXT: vmovd %xmm0, %eax
+; X86-NEXT: vpextrd $1, %xmm0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: fptoui_f16toi64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsh2usi %xmm0, %rax
+; X64-NEXT: retq
+ %result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x,
+ metadata !"fpexcept.strict") #0
+ ret i64 %result
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
new file mode 100644
index 0000000000000..58b6068ea53ac
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
+
+declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
+
+define half @sitofp_i1tof16(i1 %x) #0 {
+; X86-LABEL: sitofp_i1tof16:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: andb $1, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_i1tof16:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: negb %dil
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @sitofp_i8tof16(i8 %x) #0 {
+; X86-LABEL: sitofp_i8tof16:
+; X86: # %bb.0:
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_i8tof16:
+; X64: # %bb.0:
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @sitofp_i16tof16(i16 %x) #0 {
+; X86-LABEL: sitofp_i16tof16:
+; X86: # %bb.0:
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_i16tof16:
+; X64: # %bb.0:
+; X64-NEXT: movswl %di, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @sitofp_i32tof16(i32 %x) #0 {
+; X86-LABEL: sitofp_i32tof16:
+; X86: # %bb.0:
+; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_i32tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @sitofp_i64tof16(i64 %x) #0 {
+; X86-LABEL: sitofp_i64tof16:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtqq2ph %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: sitofp_i64tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @uitofp_i1tof16(i1 %x) #0 {
+; X86-LABEL: uitofp_i1tof16:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: andb $1, %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_i1tof16:
+; X64: # %bb.0:
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @uitofp_i8tof16(i8 %x) #0 {
+; X86-LABEL: uitofp_i8tof16:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_i8tof16:
+; X64: # %bb.0:
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @uitofp_i16tof16(i16 %x) #0 {
+; X86-LABEL: uitofp_i16tof16:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_i16tof16:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @uitofp_i32tof16(i32 %x) #0 {
+; X86-LABEL: uitofp_i32tof16:
+; X86: # %bb.0:
+; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_i32tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+define half @uitofp_i64tof16(i64 %x) #0 {
+; X86-LABEL: uitofp_i64tof16:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_i64tof16:
+; X64: # %bb.0:
+; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret half %result
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
index bd9706839943a..fab7059dd959d 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
@@ -144,5 +144,21 @@ define <16 x half> @stack_fold_mulph_ymm(<16 x half> %a0, <16 x half> %a1) {
ret <16 x half> %2
}
+define <8 x half> @stack_fold_subph(<8 x half> %a0, <8 x half> %a1) {
+ ;CHECK-LABEL: stack_fold_subph
+ ;CHECK: vsubph {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = fsub <8 x half> %a0, %a1
+ ret <8 x half> %2
+}
+
+define <16 x half> @stack_fold_subph_ymm(<16 x half> %a0, <16 x half> %a1) {
+ ;CHECK-LABEL: stack_fold_subph_ymm
+ ;CHECK: vsubph {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+ %2 = fsub <16 x half> %a0, %a1
+ ret <16 x half> %2
+}
+
attributes #0 = { "unsafe-fp-math"="false" }
attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
index 7657b769fa1b7..7171ac32336d3 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
@@ -6,6 +6,16 @@ declare <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half>, <8 x ha
declare <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half>, <8 x half>, metadata, metadata)
declare <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half>, <8 x half>, metadata, metadata)
declare <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half>, <8 x half>, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata)
+declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata)
define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: f2:
@@ -51,4 +61,130 @@ define <8 x half> @f8(<8 x half> %a, <8 x half> %b) #0 {
ret <8 x half> %ret
}
+define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ext = extractelement <2 x double> %a0, i32 0
+ %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %res = insertelement <8 x half> %a1, half %cvt, i32 0
+ ret <8 x half> %res
+}
+
+define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; CHECK-NEXT: ret{{[l|q]}}
+ %ext = extractelement <8 x half> %a1, i32 0
+ %cvt = call double @llvm.experimental.constrained.fpext.f64.f16(half %ext,
+ metadata !"fpexcept.strict") #0
+ %res = insertelement <2 x double> %a0, double %cvt, i32 0
+ ret <2 x double> %res
+}
+
+define <2 x double> @f15(<2 x half> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(
+ <2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %ret
+}
+
+define <2 x half> @f16(<2 x double> %a) #0 {
+; CHECK-LABEL: f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(
+ <2 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x half> %ret
+}
+
+define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f17:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ext = extractelement <4 x float> %a0, i32 0
+ %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %res = insertelement <8 x half> %a1, half %cvt, i32 0
+ ret <8 x half> %res
+}
+
+define <4 x float> @f18(<4 x float> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f18:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT: ret{{[l|q]}}
+ %ext = extractelement <8 x half> %a1, i32 0
+ %cvt = call float @llvm.experimental.constrained.fpext.f32.f16(half %ext,
+ metadata !"fpexcept.strict") #0
+ %res = insertelement <4 x float> %a0, float %cvt, i32 0
+ ret <4 x float> %res
+}
+
+define <2 x float> @f19(<2 x half> %a) #0 {
+; CHECK-LABEL: f19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(
+ <2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x float> %ret
+}
+
+define <4 x float> @f20(<4 x half> %a) #0 {
+; CHECK-LABEL: f20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(
+ <4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x float> %ret
+}
+
+define <2 x half> @f21(<2 x float> %a) #0 {
+; CHECK-LABEL: f21:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(
+ <2 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x half> %ret
+}
+
+define <4 x half> @f22(<4 x float> %a) #0 {
+; CHECK-LABEL: f22:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(
+ <4 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %ret
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
index d94003aab9daa..8b78a5b5c492c 100644
--- a/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
@@ -6,6 +6,10 @@ declare <16 x half> @llvm.experimental.constrained.fadd.v16f16(<16 x half>, <16
declare <16 x half> @llvm.experimental.constrained.fsub.v16f16(<16 x half>, <16 x half>, metadata, metadata)
declare <16 x half> @llvm.experimental.constrained.fmul.v16f16(<16 x half>, <16 x half>, metadata, metadata)
declare <16 x half> @llvm.experimental.constrained.fdiv.v16f16(<16 x half>, <16 x half>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half>, metadata)
+declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half>, metadata)
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float>, metadata, metadata)
define <16 x half> @f2(<16 x half> %a, <16 x half> %b) #0 {
; CHECK-LABEL: f2:
@@ -51,4 +55,52 @@ define <16 x half> @f8(<16 x half> %a, <16 x half> %b) #0 {
ret <16 x half> %ret
}
+define <4 x double> @f11(<4 x half> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(
+ <4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %ret
+}
+
+define <4 x half> @f12(<4 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(
+ <4 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %ret
+}
+
+define <8 x float> @f14(<8 x half> %a) #0 {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(
+ <8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x float> %ret
+}
+
+define <8 x half> @f15(<8 x float> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(
+ <8 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %ret
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
index 4a5c8ca00b5f7..0a25d1c9d3d01 100644
--- a/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
@@ -6,6 +6,10 @@ declare <32 x half> @llvm.experimental.constrained.fadd.v32f16(<32 x half>, <32
declare <32 x half> @llvm.experimental.constrained.fsub.v32f16(<32 x half>, <32 x half>, metadata, metadata)
declare <32 x half> @llvm.experimental.constrained.fmul.v32f16(<32 x half>, <32 x half>, metadata, metadata)
declare <32 x half> @llvm.experimental.constrained.fdiv.v32f16(<32 x half>, <32 x half>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(<8 x half>, metadata)
+declare <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half>, metadata)
+declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(<16 x float>, metadata, metadata)
define <32 x half> @f2(<32 x half> %a, <32 x half> %b) #0 {
; CHECK-LABEL: f2:
@@ -51,4 +55,51 @@ define <32 x half> @f8(<32 x half> %a, <32 x half> %b) #0 {
ret <32 x half> %ret
}
+define <8 x double> @f11(<8 x half> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(
+ <8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x double> %ret
+}
+
+define <8 x half> @f12(<8 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(
+ <8 x double> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %ret
+}
+
+define <16 x float> @f14(<16 x half> %a) #0 {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(
+ <16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x float> %ret
+}
+
+define <16 x half> @f15(<16 x float> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(
+ <16 x float> %a,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %ret
+}
+
attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
new file mode 100644
index 0000000000000..441fd8926acd0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+
+declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half>, metadata)
+declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half>, metadata)
+declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half>, metadata)
+declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half>, metadata)
+declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half>, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half>, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half>, metadata)
+declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half>, metadata)
+declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half>, metadata)
+declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half>, metadata)
+declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half>, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half>, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half>, metadata)
+declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half>, metadata)
+declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half>, metadata)
+declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half>, metadata)
+declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half>, metadata)
+declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half>, metadata)
+declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half>, metadata)
+
+define <2 x i64> @strict_vector_fptosi_v2f16_to_v2i64(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @strict_vector_fptoui_v2f16_to_v2i64(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i64> %ret
+}
+
+define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i32> %ret
+}
+
+define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i16> %ret
+}
+
+define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i16> %ret
+}
+
+define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i8> %ret
+}
+
+define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i8> %ret
+}
+
+define <2 x i1> @strict_vector_fptosi_v2f16_to_v2i1(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: vpmovm2q %k0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i1> %ret
+}
+
+define <2 x i1> @strict_vector_fptoui_v2f16_to_v2i1(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: vpmovm2q %k0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <2 x i1> %ret
+}
+
+define <4 x i32> @strict_vector_fptosi_v4f16_to_v4i32(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @strict_vector_fptoui_v4f16_to_v4i32(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i32> %ret
+}
+
+define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i16> %ret
+}
+
+define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i16> %ret
+}
+
+define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i8> %ret
+}
+
+define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i8> %ret
+}
+
+define <4 x i1> @strict_vector_fptosi_v4f16_to_v4i1(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: vpmovm2d %k0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i1> %ret
+}
+
+define <4 x i1> @strict_vector_fptoui_v4f16_to_v4i1(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: vpmovm2d %k0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i1> %ret
+}
+
+define <8 x i16> @strict_vector_fptosi_v8f16_to_v8i16(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @strict_vector_fptoui_v8f16_to_v8i16(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i16> %ret
+}
+
+define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i8> %ret
+}
+
+define <8 x i1> @strict_vector_fptosi_v8f16_to_v8i1(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT: vpmovd2m %ymm0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i1> %ret
+}
+
+define <8 x i1> @strict_vector_fptoui_v8f16_to_v8i1(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
+; CHECK-NEXT: vpmovd2m %ymm0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i1> %ret
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
new file mode 100644
index 0000000000000..bc0dd022bfae4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+
+
+declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half>, metadata)
+declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half>, metadata)
+declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half>, metadata)
+declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half>, metadata)
+declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half>, metadata)
+declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half>, metadata)
+declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half>, metadata)
+declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half>, metadata)
+declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half>, metadata)
+
+define <4 x i64> @strict_vector_fptosi_v4f16_to_v4i64(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @strict_vector_fptoui_v4f16_to_v4i64(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <4 x i64> %ret
+}
+
+define <8 x i32> @strict_vector_fptosi_v8f16_to_v8i32(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i32> %ret
+}
+
+define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i16> %ret
+}
+
+define <16 x i8> @strict_vector_fptosi_v16f16_to_v16i8(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpmovdb %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i8> %ret
+}
+
+define <16 x i8> @strict_vector_fptoui_v16f16_to_v16i8(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpmovdb %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i8> %ret
+}
+
+define <16 x i1> @strict_vector_fptosi_v16f16_to_v16i1(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpmovd2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i1> %ret
+}
+
+define <16 x i1> @strict_vector_fptoui_v16f16_to_v16i1(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: vpslld $31, %zmm0, %zmm0
+; CHECK-NEXT: vpmovd2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i1> %ret
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
new file mode 100644
index 0000000000000..dc8823710291e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
+
+
+declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half>, metadata)
+declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half>, metadata)
+declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half>, metadata)
+declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half>, metadata)
+declare <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half>, metadata)
+declare <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half>, metadata)
+declare <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half>, metadata)
+declare <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half>, metadata)
+declare <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half>, metadata)
+declare <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half>, metadata)
+
+define <8 x i64> @strict_vector_fptosi_v8f16_to_v8i64(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2qq %xmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @strict_vector_fptoui_v8f16_to_v8i64(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <8 x i64> %ret
+}
+
+define <16 x i32> @strict_vector_fptosi_v16f16_to_v16i32(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @strict_vector_fptoui_v16f16_to_v16i32(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2udq %ymm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <16 x i32> %ret
+}
+
+define <32 x i16> @strict_vector_fptosi_v32f16_to_v32i16(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i16> %ret
+}
+
+define <32 x i16> @strict_vector_fptoui_v32f16_to_v32i16(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2uw %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i16> %ret
+}
+
+define <32 x i8> @strict_vector_fptosi_v32f16_to_v32i8(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: vpmovwb %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i8> %ret
+}
+
+define <32 x i8> @strict_vector_fptoui_v32f16_to_v32i8(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: vpmovwb %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i8> %ret
+}
+
+define <32 x i1> @strict_vector_fptosi_v32f16_to_v32i1(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: vpmovw2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i1> %ret
+}
+
+define <32 x i1> @strict_vector_fptoui_v32f16_to_v32i1(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0
+; CHECK-NEXT: vpmovw2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %ret = call <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half> %a,
+ metadata !"fpexcept.strict") #0
+ ret <32 x i1> %ret
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
new file mode 100644
index 0000000000000..7e10ab56faae1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
+
+define <4 x half> @sitofp_v4i32_v4f16(<4 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v4i32_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %result
+}
+
+define <4 x half> @uitofp_v4i32_v4f16(<4 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v4i32_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %result
+}
+
+define <2 x half> @sitofp_v2i64_v2f16(<2 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v2i64_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x half> %result
+}
+
+define <2 x half> @uitofp_v2i64_v2f16(<2 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v2i64_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x half> %result
+}
+
+define <8 x half> @sitofp_v8i1_v8f16(<8 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v8i1_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i1_v8f16(<8 x i1> %x) #0 {
+; X86-LABEL: uitofp_v8i1_v8f16:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vcvtuw2ph %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_v8i1_v8f16:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vcvtuw2ph %xmm0, %xmm0
+; X64-NEXT: retq
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @sitofp_v8i8_v8f16(<8 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v8i8_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i8_v8f16(<8 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v8i8_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @sitofp_v8i16_v8f16(<8 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v8i16_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i16_v8f16(<8 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v8i16_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
new file mode 100644
index 0000000000000..9eaef5a772fd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
+
+define <16 x half> @sitofp_v16i1_v16f16(<16 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v16i1_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT: vpsllw $15, %ymm0, %ymm0
+; CHECK-NEXT: vpsraw $15, %ymm0, %ymm0
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i1_v16f16(<16 x i1> %x) #0 {
+; X86-LABEL: uitofp_v16i1_v16f16:
+; X86: # %bb.0:
+; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; X86-NEXT: vcvtuw2ph %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_v16i1_v16f16:
+; X64: # %bb.0:
+; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; X64-NEXT: vcvtuw2ph %ymm0, %ymm0
+; X64-NEXT: retq
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @sitofp_v16i8_v16f16(<16 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v16i8_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i8_v16f16(<16 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v16i8_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @sitofp_v16i16_v16f16(<16 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v16i16_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i16_v16f16(<16 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v16i16_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <8 x half> @sitofp_v8i32_v8f16(<8 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v8i32_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i32_v8f16(<8 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v8i32_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <4 x half> @sitofp_v4i64_v4f16(<4 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v4i64_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %result
+}
+
+define <4 x half> @uitofp_v4i64_v4f16(<4 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v4i64_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x half> %result
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
new file mode 100644
index 0000000000000..c807af0932b56
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
+
+define <32 x half> @sitofp_v32i1_v32f16(<32 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v32i1_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0
+; CHECK-NEXT: vpsraw $15, %zmm0, %zmm0
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i1_v32f16(<32 x i1> %x) #0 {
+; X86-LABEL: uitofp_v32i1_v32f16:
+; X86: # %bb.0:
+; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; X86-NEXT: vcvtuw2ph %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: uitofp_v32i1_v32f16:
+; X64: # %bb.0:
+; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; X64-NEXT: vcvtuw2ph %zmm0, %zmm0
+; X64-NEXT: retq
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <32 x half> @sitofp_v32i8_v32f16(<32 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v32i8_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i8_v32f16(<32 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v32i8_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <32 x half> @sitofp_v32i16_v32f16(<32 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v32i16_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i16_v32f16(<32 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v32i16_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <32 x half> %result
+}
+
+define <16 x half> @sitofp_v16i32_v16f16(<16 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v16i32_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i32_v16f16(<16 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v16i32_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <16 x half> %result
+}
+
+define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v8i64_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i64_v8f16(<8 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v8i64_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <8 x half> %result
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/MC/Disassembler/X86/avx512fp16.txt b/llvm/test/MC/Disassembler/X86/avx512fp16.txt
index 6ba043ecd1be2..8115431808335 100644
--- a/llvm/test/MC/Disassembler/X86/avx512fp16.txt
+++ b/llvm/test/MC/Disassembler/X86/avx512fp16.txt
@@ -460,3 +460,899 @@
# ATT: vucomish -256(%rdx), %xmm30
# INTEL: vucomish xmm30, word ptr [rdx - 256]
0x62,0x65,0x7c,0x08,0x2e,0x72,0x80
+
+# ATT: vcvtdq2ph %zmm29, %ymm30
+# INTEL: vcvtdq2ph ymm30, zmm29
+0x62,0x05,0x7c,0x48,0x5b,0xf5
+
+# ATT: vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtdq2ph ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x5b,0xf5
+
+# ATT: vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtdq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtdq2ph (%r9){1to16}, %ymm30
+# INTEL: vcvtdq2ph ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x5b,0x31
+
+# ATT: vcvtdq2ph 8128(%rcx), %ymm30
+# INTEL: vcvtdq2ph ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f
+
+# ATT: vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtdq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80
+
+# ATT: vcvtpd2ph %zmm29, %xmm30
+# INTEL: vcvtpd2ph xmm30, zmm29
+0x62,0x05,0xfd,0x48,0x5a,0xf5
+
+# ATT: vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtpd2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xfd,0x18,0x5a,0xf5
+
+# ATT: vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtpd2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtpd2ph (%r9){1to8}, %xmm30
+# INTEL: vcvtpd2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xfd,0x58,0x5a,0x31
+
+# ATT: vcvtpd2phz 8128(%rcx), %xmm30
+# INTEL: vcvtpd2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f
+
+# ATT: vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80
+
+# ATT: vcvtph2dq %ymm29, %zmm30
+# INTEL: vcvtph2dq zmm30, ymm29
+0x62,0x05,0x7d,0x48,0x5b,0xf5
+
+# ATT: vcvtph2dq {rn-sae}, %ymm29, %zmm30
+# INTEL: vcvtph2dq zmm30, ymm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x5b,0xf5
+
+# ATT: vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2dq (%r9){1to16}, %zmm30
+# INTEL: vcvtph2dq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7d,0x58,0x5b,0x31
+
+# ATT: vcvtph2dq 4064(%rcx), %zmm30
+# INTEL: vcvtph2dq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f
+
+# ATT: vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80
+
+# ATT: vcvtph2pd %xmm29, %zmm30
+# INTEL: vcvtph2pd zmm30, xmm29
+0x62,0x05,0x7c,0x48,0x5a,0xf5
+
+# ATT: vcvtph2pd {sae}, %xmm29, %zmm30
+# INTEL: vcvtph2pd zmm30, xmm29, {sae}
+0x62,0x05,0x7c,0x18,0x5a,0xf5
+
+# ATT: vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2pd zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2pd (%r9){1to8}, %zmm30
+# INTEL: vcvtph2pd zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7c,0x58,0x5a,0x31
+
+# ATT: vcvtph2pd 2032(%rcx), %zmm30
+# INTEL: vcvtph2pd zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f
+
+# ATT: vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2pd zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80
+
+# ATT: vcvtph2psx %ymm29, %zmm30
+# INTEL: vcvtph2psx zmm30, ymm29
+0x62,0x06,0x7d,0x48,0x13,0xf5
+
+# ATT: vcvtph2psx {sae}, %ymm29, %zmm30
+# INTEL: vcvtph2psx zmm30, ymm29, {sae}
+0x62,0x06,0x7d,0x18,0x13,0xf5
+
+# ATT: vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2psx zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2psx (%r9){1to16}, %zmm30
+# INTEL: vcvtph2psx zmm30, word ptr [r9]{1to16}
+0x62,0x46,0x7d,0x58,0x13,0x31
+
+# ATT: vcvtph2psx 4064(%rcx), %zmm30
+# INTEL: vcvtph2psx zmm30, ymmword ptr [rcx + 4064]
+0x62,0x66,0x7d,0x48,0x13,0x71,0x7f
+
+# ATT: vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2psx zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x66,0x7d,0xdf,0x13,0x72,0x80
+
+# ATT: vcvtph2qq %xmm29, %zmm30
+# INTEL: vcvtph2qq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x7b,0xf5
+
+# ATT: vcvtph2qq {rn-sae}, %xmm29, %zmm30
+# INTEL: vcvtph2qq zmm30, xmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x7b,0xf5
+
+# ATT: vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2qq (%r9){1to8}, %zmm30
+# INTEL: vcvtph2qq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x7b,0x31
+
+# ATT: vcvtph2qq 2032(%rcx), %zmm30
+# INTEL: vcvtph2qq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f
+
+# ATT: vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80
+
+# ATT: vcvtph2udq %ymm29, %zmm30
+# INTEL: vcvtph2udq zmm30, ymm29
+0x62,0x05,0x7c,0x48,0x79,0xf5
+
+# ATT: vcvtph2udq {rn-sae}, %ymm29, %zmm30
+# INTEL: vcvtph2udq zmm30, ymm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x79,0xf5
+
+# ATT: vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2udq (%r9){1to16}, %zmm30
+# INTEL: vcvtph2udq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x79,0x31
+
+# ATT: vcvtph2udq 4064(%rcx), %zmm30
+# INTEL: vcvtph2udq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7c,0x48,0x79,0x71,0x7f
+
+# ATT: vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7c,0xdf,0x79,0x72,0x80
+
+# ATT: vcvtph2uqq %xmm29, %zmm30
+# INTEL: vcvtph2uqq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x79,0xf5
+
+# ATT: vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+# INTEL: vcvtph2uqq zmm30, xmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x79,0xf5
+
+# ATT: vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uqq (%r9){1to8}, %zmm30
+# INTEL: vcvtph2uqq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x79,0x31
+
+# ATT: vcvtph2uqq 2032(%rcx), %zmm30
+# INTEL: vcvtph2uqq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x79,0x71,0x7f
+
+# ATT: vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x79,0x72,0x80
+
+# ATT: vcvtph2uw %zmm29, %zmm30
+# INTEL: vcvtph2uw zmm30, zmm29
+0x62,0x05,0x7c,0x48,0x7d,0xf5
+
+# ATT: vcvtph2uw {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtph2uw zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x7d,0xf5
+
+# ATT: vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uw (%r9){1to32}, %zmm30
+# INTEL: vcvtph2uw zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7c,0x58,0x7d,0x31
+
+# ATT: vcvtph2uw 8128(%rcx), %zmm30
+# INTEL: vcvtph2uw zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f
+
+# ATT: vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80
+
+# ATT: vcvtph2w %zmm29, %zmm30
+# INTEL: vcvtph2w zmm30, zmm29
+0x62,0x05,0x7d,0x48,0x7d,0xf5
+
+# ATT: vcvtph2w {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtph2w zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x7d,0xf5
+
+# ATT: vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2w (%r9){1to32}, %zmm30
+# INTEL: vcvtph2w zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7d,0x58,0x7d,0x31
+
+# ATT: vcvtph2w 8128(%rcx), %zmm30
+# INTEL: vcvtph2w zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f
+
+# ATT: vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80
+
+# ATT: vcvtps2phx %zmm29, %ymm30
+# INTEL: vcvtps2phx ymm30, zmm29
+0x62,0x05,0x7d,0x48,0x1d,0xf5
+
+# ATT: vcvtps2phx {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtps2phx ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x1d,0xf5
+
+# ATT: vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtps2phx ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtps2phx (%r9){1to16}, %ymm30
+# INTEL: vcvtps2phx ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7d,0x58,0x1d,0x31
+
+# ATT: vcvtps2phx 8128(%rcx), %ymm30
+# INTEL: vcvtps2phx ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f
+
+# ATT: vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtps2phx ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80
+
+# ATT: vcvtqq2ph %zmm29, %xmm30
+# INTEL: vcvtqq2ph xmm30, zmm29
+0x62,0x05,0xfc,0x48,0x5b,0xf5
+
+# ATT: vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtqq2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xfc,0x18,0x5b,0xf5
+
+# ATT: vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtqq2ph (%r9){1to8}, %xmm30
+# INTEL: vcvtqq2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xfc,0x58,0x5b,0x31
+
+# ATT: vcvtqq2phz 8128(%rcx), %xmm30
+# INTEL: vcvtqq2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f
+
+# ATT: vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80
+
+# ATT: vcvtsd2sh %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, xmm28
+0x62,0x05,0x97,0x00,0x5a,0xf4
+
+# ATT: vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, xmm28, {rn-sae}
+0x62,0x05,0x97,0x10,0x5a,0xf4
+
+# ATT: vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsd2sh xmm30 {k7}, xmm29, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsd2sh (%r9), %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [r9]
+0x62,0x45,0x97,0x00,0x5a,0x31
+
+# ATT: vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x97,0x00,0x5a,0x71,0x7f
+
+# ATT: vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsd2sh xmm30 {k7} {z}, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x97,0x87,0x5a,0x72,0x80
+
+# ATT: vcvtsh2sd %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, xmm28
+0x62,0x05,0x16,0x00,0x5a,0xf4
+
+# ATT: vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, xmm28, {sae}
+0x62,0x05,0x16,0x10,0x5a,0xf4
+
+# ATT: vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsh2sd xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2sd (%r9), %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [r9]
+0x62,0x45,0x16,0x00,0x5a,0x31
+
+# ATT: vcvtsh2sd 254(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [rcx + 254]
+0x62,0x65,0x16,0x00,0x5a,0x71,0x7f
+
+# ATT: vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsh2sd xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
+0x62,0x65,0x16,0x87,0x5a,0x72,0x80
+
+# ATT: vcvtsh2si %xmm30, %edx
+# INTEL: vcvtsh2si edx, xmm30
+0x62,0x95,0x7e,0x08,0x2d,0xd6
+
+# ATT: vcvtsh2si {rn-sae}, %xmm30, %edx
+# INTEL: vcvtsh2si edx, xmm30, {rn-sae}
+0x62,0x95,0x7e,0x18,0x2d,0xd6
+
+# ATT: vcvtsh2si %xmm30, %r12
+# INTEL: vcvtsh2si r12, xmm30
+0x62,0x15,0xfe,0x08,0x2d,0xe6
+
+# ATT: vcvtsh2si {rn-sae}, %xmm30, %r12
+# INTEL: vcvtsh2si r12, xmm30, {rn-sae}
+0x62,0x15,0xfe,0x18,0x2d,0xe6
+
+# ATT: vcvtsh2si 268435456(%rbp,%r14,8), %edx
+# INTEL: vcvtsh2si edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2si (%r9), %edx
+# INTEL: vcvtsh2si edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x2d,0x11
+
+# ATT: vcvtsh2si 254(%rcx), %edx
+# INTEL: vcvtsh2si edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f
+
+# ATT: vcvtsh2si -256(%rdx), %edx
+# INTEL: vcvtsh2si edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80
+
+# ATT: vcvtsh2si 268435456(%rbp,%r14,8), %r12
+# INTEL: vcvtsh2si r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2si (%r9), %r12
+# INTEL: vcvtsh2si r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x2d,0x21
+
+# ATT: vcvtsh2si 254(%rcx), %r12
+# INTEL: vcvtsh2si r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f
+
+# ATT: vcvtsh2si -256(%rdx), %r12
+# INTEL: vcvtsh2si r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x2d,0x62,0x80
+
+# ATT: vcvtsh2ss %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, xmm28
+0x62,0x06,0x14,0x00,0x13,0xf4
+
+# ATT: vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, xmm28, {sae}
+0x62,0x06,0x14,0x10,0x13,0xf4
+
+# ATT: vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsh2ss xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2ss (%r9), %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [r9]
+0x62,0x46,0x14,0x00,0x13,0x31
+
+# ATT: vcvtsh2ss 254(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [rcx + 254]
+0x62,0x66,0x14,0x00,0x13,0x71,0x7f
+
+# ATT: vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsh2ss xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
+0x62,0x66,0x14,0x87,0x13,0x72,0x80
+
+# ATT: vcvtsh2usi %xmm30, %edx
+# INTEL: vcvtsh2usi edx, xmm30
+0x62,0x95,0x7e,0x08,0x79,0xd6
+
+# ATT: vcvtsh2usi {rn-sae}, %xmm30, %edx
+# INTEL: vcvtsh2usi edx, xmm30, {rn-sae}
+0x62,0x95,0x7e,0x18,0x79,0xd6
+
+# ATT: vcvtsh2usi %xmm30, %r12
+# INTEL: vcvtsh2usi r12, xmm30
+0x62,0x15,0xfe,0x08,0x79,0xe6
+
+# ATT: vcvtsh2usi {rn-sae}, %xmm30, %r12
+# INTEL: vcvtsh2usi r12, xmm30, {rn-sae}
+0x62,0x15,0xfe,0x18,0x79,0xe6
+
+# ATT: vcvtsh2usi 268435456(%rbp,%r14,8), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2usi (%r9), %edx
+# INTEL: vcvtsh2usi edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x79,0x11
+
+# ATT: vcvtsh2usi 254(%rcx), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f
+
+# ATT: vcvtsh2usi -256(%rdx), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x79,0x52,0x80
+
+# ATT: vcvtsh2usi 268435456(%rbp,%r14,8), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsh2usi (%r9), %r12
+# INTEL: vcvtsh2usi r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x79,0x21
+
+# ATT: vcvtsh2usi 254(%rcx), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x79,0x61,0x7f
+
+# ATT: vcvtsh2usi -256(%rdx), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x79,0x62,0x80
+
+# ATT: vcvtsi2sh %r12, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, r12
+0x62,0x45,0x96,0x00,0x2a,0xf4
+
+# ATT: vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, r12
+0x62,0x45,0x96,0x10,0x2a,0xf4
+
+# ATT: vcvtsi2sh %edx, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, edx
+0x62,0x65,0x16,0x00,0x2a,0xf2
+
+# ATT: vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, edx
+0x62,0x65,0x16,0x10,0x2a,0xf2
+
+# ATT: vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtsi2shl (%r9), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x16,0x00,0x2a,0x31
+
+# ATT: vcvtsi2shl 508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x16,0x00,0x2a,0x71,0x7f
+
+# ATT: vcvtsi2shl -512(%rdx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x16,0x00,0x2a,0x72,0x80
+
+# ATT: vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x96,0x00,0x2a,0x71,0x7f
+
+# ATT: vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x96,0x00,0x2a,0x72,0x80
+
+# ATT: vcvtss2sh %xmm28, %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, xmm28
+0x62,0x05,0x14,0x00,0x1d,0xf4
+
+# ATT: vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, xmm28, {rn-sae}
+0x62,0x05,0x14,0x10,0x1d,0xf4
+
+# ATT: vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtss2sh xmm30 {k7}, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtss2sh (%r9), %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x14,0x00,0x1d,0x31
+
+# ATT: vcvtss2sh 508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x14,0x00,0x1d,0x71,0x7f
+
+# ATT: vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtss2sh xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x14,0x87,0x1d,0x72,0x80
+
+# ATT: vcvttph2dq %ymm29, %zmm30
+# INTEL: vcvttph2dq zmm30, ymm29
+0x62,0x05,0x7e,0x48,0x5b,0xf5
+
+# ATT: vcvttph2dq {sae}, %ymm29, %zmm30
+# INTEL: vcvttph2dq zmm30, ymm29, {sae}
+0x62,0x05,0x7e,0x18,0x5b,0xf5
+
+# ATT: vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2dq (%r9){1to16}, %zmm30
+# INTEL: vcvttph2dq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7e,0x58,0x5b,0x31
+
+# ATT: vcvttph2dq 4064(%rcx), %zmm30
+# INTEL: vcvttph2dq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f
+
+# ATT: vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80
+
+# ATT: vcvttph2qq %xmm29, %zmm30
+# INTEL: vcvttph2qq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x7a,0xf5
+
+# ATT: vcvttph2qq {sae}, %xmm29, %zmm30
+# INTEL: vcvttph2qq zmm30, xmm29, {sae}
+0x62,0x05,0x7d,0x18,0x7a,0xf5
+
+# ATT: vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2qq (%r9){1to8}, %zmm30
+# INTEL: vcvttph2qq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x7a,0x31
+
+# ATT: vcvttph2qq 2032(%rcx), %zmm30
+# INTEL: vcvttph2qq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f
+
+# ATT: vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80
+
+# ATT: vcvttph2udq %ymm29, %zmm30
+# INTEL: vcvttph2udq zmm30, ymm29
+0x62,0x05,0x7c,0x48,0x78,0xf5
+
+# ATT: vcvttph2udq {sae}, %ymm29, %zmm30
+# INTEL: vcvttph2udq zmm30, ymm29, {sae}
+0x62,0x05,0x7c,0x18,0x78,0xf5
+
+# ATT: vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2udq (%r9){1to16}, %zmm30
+# INTEL: vcvttph2udq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x78,0x31
+
+# ATT: vcvttph2udq 4064(%rcx), %zmm30
+# INTEL: vcvttph2udq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7c,0x48,0x78,0x71,0x7f
+
+# ATT: vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7c,0xdf,0x78,0x72,0x80
+
+# ATT: vcvttph2uqq %xmm29, %zmm30
+# INTEL: vcvttph2uqq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x78,0xf5
+
+# ATT: vcvttph2uqq {sae}, %xmm29, %zmm30
+# INTEL: vcvttph2uqq zmm30, xmm29, {sae}
+0x62,0x05,0x7d,0x18,0x78,0xf5
+
+# ATT: vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uqq (%r9){1to8}, %zmm30
+# INTEL: vcvttph2uqq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x78,0x31
+
+# ATT: vcvttph2uqq 2032(%rcx), %zmm30
+# INTEL: vcvttph2uqq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x78,0x71,0x7f
+
+# ATT: vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x78,0x72,0x80
+
+# ATT: vcvttph2uw %zmm29, %zmm30
+# INTEL: vcvttph2uw zmm30, zmm29
+0x62,0x05,0x7c,0x48,0x7c,0xf5
+
+# ATT: vcvttph2uw {sae}, %zmm29, %zmm30
+# INTEL: vcvttph2uw zmm30, zmm29, {sae}
+0x62,0x05,0x7c,0x18,0x7c,0xf5
+
+# ATT: vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uw (%r9){1to32}, %zmm30
+# INTEL: vcvttph2uw zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7c,0x58,0x7c,0x31
+
+# ATT: vcvttph2uw 8128(%rcx), %zmm30
+# INTEL: vcvttph2uw zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f
+
+# ATT: vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80
+
+# ATT: vcvttph2w %zmm29, %zmm30
+# INTEL: vcvttph2w zmm30, zmm29
+0x62,0x05,0x7d,0x48,0x7c,0xf5
+
+# ATT: vcvttph2w {sae}, %zmm29, %zmm30
+# INTEL: vcvttph2w zmm30, zmm29, {sae}
+0x62,0x05,0x7d,0x18,0x7c,0xf5
+
+# ATT: vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2w (%r9){1to32}, %zmm30
+# INTEL: vcvttph2w zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7d,0x58,0x7c,0x31
+
+# ATT: vcvttph2w 8128(%rcx), %zmm30
+# INTEL: vcvttph2w zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f
+
+# ATT: vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80
+
+# ATT: vcvttsh2si %xmm30, %edx
+# INTEL: vcvttsh2si edx, xmm30
+0x62,0x95,0x7e,0x08,0x2c,0xd6
+
+# ATT: vcvttsh2si {sae}, %xmm30, %edx
+# INTEL: vcvttsh2si edx, xmm30, {sae}
+0x62,0x95,0x7e,0x18,0x2c,0xd6
+
+# ATT: vcvttsh2si %xmm30, %r12
+# INTEL: vcvttsh2si r12, xmm30
+0x62,0x15,0xfe,0x08,0x2c,0xe6
+
+# ATT: vcvttsh2si {sae}, %xmm30, %r12
+# INTEL: vcvttsh2si r12, xmm30, {sae}
+0x62,0x15,0xfe,0x18,0x2c,0xe6
+
+# ATT: vcvttsh2si 268435456(%rbp,%r14,8), %edx
+# INTEL: vcvttsh2si edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsh2si (%r9), %edx
+# INTEL: vcvttsh2si edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x2c,0x11
+
+# ATT: vcvttsh2si 254(%rcx), %edx
+# INTEL: vcvttsh2si edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f
+
+# ATT: vcvttsh2si -256(%rdx), %edx
+# INTEL: vcvttsh2si edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80
+
+# ATT: vcvttsh2si 268435456(%rbp,%r14,8), %r12
+# INTEL: vcvttsh2si r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsh2si (%r9), %r12
+# INTEL: vcvttsh2si r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x2c,0x21
+
+# ATT: vcvttsh2si 254(%rcx), %r12
+# INTEL: vcvttsh2si r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f
+
+# ATT: vcvttsh2si -256(%rdx), %r12
+# INTEL: vcvttsh2si r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x2c,0x62,0x80
+
+# ATT: vcvttsh2usi %xmm30, %edx
+# INTEL: vcvttsh2usi edx, xmm30
+0x62,0x95,0x7e,0x08,0x78,0xd6
+
+# ATT: vcvttsh2usi {sae}, %xmm30, %edx
+# INTEL: vcvttsh2usi edx, xmm30, {sae}
+0x62,0x95,0x7e,0x18,0x78,0xd6
+
+# ATT: vcvttsh2usi %xmm30, %r12
+# INTEL: vcvttsh2usi r12, xmm30
+0x62,0x15,0xfe,0x08,0x78,0xe6
+
+# ATT: vcvttsh2usi {sae}, %xmm30, %r12
+# INTEL: vcvttsh2usi r12, xmm30, {sae}
+0x62,0x15,0xfe,0x18,0x78,0xe6
+
+# ATT: vcvttsh2usi 268435456(%rbp,%r14,8), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsh2usi (%r9), %edx
+# INTEL: vcvttsh2usi edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x78,0x11
+
+# ATT: vcvttsh2usi 254(%rcx), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f
+
+# ATT: vcvttsh2usi -256(%rdx), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x78,0x52,0x80
+
+# ATT: vcvttsh2usi 268435456(%rbp,%r14,8), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsh2usi (%r9), %r12
+# INTEL: vcvttsh2usi r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x78,0x21
+
+# ATT: vcvttsh2usi 254(%rcx), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x78,0x61,0x7f
+
+# ATT: vcvttsh2usi -256(%rdx), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x78,0x62,0x80
+
+# ATT: vcvtudq2ph %zmm29, %ymm30
+# INTEL: vcvtudq2ph ymm30, zmm29
+0x62,0x05,0x7f,0x48,0x7a,0xf5
+
+# ATT: vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtudq2ph ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7f,0x18,0x7a,0xf5
+
+# ATT: vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtudq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtudq2ph (%r9){1to16}, %ymm30
+# INTEL: vcvtudq2ph ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7f,0x58,0x7a,0x31
+
+# ATT: vcvtudq2ph 8128(%rcx), %ymm30
+# INTEL: vcvtudq2ph ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f
+
+# ATT: vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtudq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80
+
+# ATT: vcvtuqq2ph %zmm29, %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmm29
+0x62,0x05,0xff,0x48,0x7a,0xf5
+
+# ATT: vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xff,0x18,0x7a,0xf5
+
+# ATT: vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtuqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtuqq2ph (%r9){1to8}, %xmm30
+# INTEL: vcvtuqq2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xff,0x58,0x7a,0x31
+
+# ATT: vcvtuqq2phz 8128(%rcx), %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xff,0x48,0x7a,0x71,0x7f
+
+# ATT: vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xff,0xdf,0x7a,0x72,0x80
+
+# ATT: vcvtusi2sh %r12, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, r12
+0x62,0x45,0x96,0x00,0x7b,0xf4
+
+# ATT: vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, r12
+0x62,0x45,0x96,0x10,0x7b,0xf4
+
+# ATT: vcvtusi2sh %edx, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, edx
+0x62,0x65,0x16,0x00,0x7b,0xf2
+
+# ATT: vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, edx
+0x62,0x65,0x16,0x10,0x7b,0xf2
+
+# ATT: vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtusi2shl (%r9), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x16,0x00,0x7b,0x31
+
+# ATT: vcvtusi2shl 508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x16,0x00,0x7b,0x71,0x7f
+
+# ATT: vcvtusi2shl -512(%rdx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x16,0x00,0x7b,0x72,0x80
+
+# ATT: vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x96,0x00,0x7b,0x71,0x7f
+
+# ATT: vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x96,0x00,0x7b,0x72,0x80
+
+# ATT: vcvtuw2ph %zmm29, %zmm30
+# INTEL: vcvtuw2ph zmm30, zmm29
+0x62,0x05,0x7f,0x48,0x7d,0xf5
+
+# ATT: vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtuw2ph zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7f,0x18,0x7d,0xf5
+
+# ATT: vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtuw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtuw2ph (%r9){1to32}, %zmm30
+# INTEL: vcvtuw2ph zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7f,0x58,0x7d,0x31
+
+# ATT: vcvtuw2ph 8128(%rcx), %zmm30
+# INTEL: vcvtuw2ph zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f
+
+# ATT: vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtuw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80
+
+# ATT: vcvtw2ph %zmm29, %zmm30
+# INTEL: vcvtw2ph zmm30, zmm29
+0x62,0x05,0x7e,0x48,0x7d,0xf5
+
+# ATT: vcvtw2ph {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtw2ph zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7e,0x18,0x7d,0xf5
+
+# ATT: vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvtw2ph (%r9){1to32}, %zmm30
+# INTEL: vcvtw2ph zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7e,0x58,0x7d,0x31
+
+# ATT: vcvtw2ph 8128(%rcx), %zmm30
+# INTEL: vcvtw2ph zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f
+
+# ATT: vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt b/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
index 362215492e1b3..63acd5be1946f 100644
--- a/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
+++ b/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
@@ -280,3 +280,859 @@
# ATT: vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
# INTEL: vsubph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80
+
+# ATT: vcvtdq2ph %xmm5, %xmm6
+# INTEL: vcvtdq2ph xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x5b,0xf5
+
+# ATT: vcvtdq2ph %ymm5, %xmm6
+# INTEL: vcvtdq2ph xmm6, ymm5
+0x62,0xf5,0x7c,0x28,0x5b,0xf5
+
+# ATT: vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtdq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtdq2ph (%ecx){1to4}, %xmm6
+# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x5b,0x31
+
+# ATT: vcvtdq2phx 2032(%ecx), %xmm6
+# INTEL: vcvtdq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f
+
+# ATT: vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80
+
+# ATT: vcvtdq2ph (%ecx){1to8}, %xmm6
+# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x5b,0x31
+
+# ATT: vcvtdq2phy 4064(%ecx), %xmm6
+# INTEL: vcvtdq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f
+
+# ATT: vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80
+
+# ATT: vcvtpd2ph %xmm5, %xmm6
+# INTEL: vcvtpd2ph xmm6, xmm5
+0x62,0xf5,0xfd,0x08,0x5a,0xf5
+
+# ATT: vcvtpd2ph %ymm5, %xmm6
+# INTEL: vcvtpd2ph xmm6, ymm5
+0x62,0xf5,0xfd,0x28,0x5a,0xf5
+
+# ATT: vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtpd2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtpd2ph (%ecx){1to2}, %xmm6
+# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xfd,0x18,0x5a,0x31
+
+# ATT: vcvtpd2phx 2032(%ecx), %xmm6
+# INTEL: vcvtpd2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f
+
+# ATT: vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80
+
+# ATT: vcvtpd2ph (%ecx){1to4}, %xmm6
+# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xfd,0x38,0x5a,0x31
+
+# ATT: vcvtpd2phy 4064(%ecx), %xmm6
+# INTEL: vcvtpd2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f
+
+# ATT: vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80
+
+# ATT: vcvtph2dq %xmm5, %xmm6
+# INTEL: vcvtph2dq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x5b,0xf5
+
+# ATT: vcvtph2dq %xmm5, %ymm6
+# INTEL: vcvtph2dq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x5b,0xf5
+
+# ATT: vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2dq (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2dq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x18,0x5b,0x31
+
+# ATT: vcvtph2dq 1016(%ecx), %xmm6
+# INTEL: vcvtph2dq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f
+
+# ATT: vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80
+
+# ATT: vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2dq (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2dq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x38,0x5b,0x31
+
+# ATT: vcvtph2dq 2032(%ecx), %ymm6
+# INTEL: vcvtph2dq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f
+
+# ATT: vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80
+
+# ATT: vcvtph2pd %xmm5, %xmm6
+# INTEL: vcvtph2pd xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x5a,0xf5
+
+# ATT: vcvtph2pd %xmm5, %ymm6
+# INTEL: vcvtph2pd ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x5a,0xf5
+
+# ATT: vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2pd xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2pd (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2pd xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7c,0x18,0x5a,0x31
+
+# ATT: vcvtph2pd 508(%ecx), %xmm6
+# INTEL: vcvtph2pd xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f
+
+# ATT: vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2pd xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80
+
+# ATT: vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2pd ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2pd (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2pd ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x38,0x5a,0x31
+
+# ATT: vcvtph2pd 1016(%ecx), %ymm6
+# INTEL: vcvtph2pd ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f
+
+# ATT: vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2pd ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80
+
+# ATT: vcvtph2psx %xmm5, %xmm6
+# INTEL: vcvtph2psx xmm6, xmm5
+0x62,0xf6,0x7d,0x08,0x13,0xf5
+
+# ATT: vcvtph2psx %xmm5, %ymm6
+# INTEL: vcvtph2psx ymm6, xmm5
+0x62,0xf6,0x7d,0x28,0x13,0xf5
+
+# ATT: vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2psx xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2psx (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2psx xmm6, word ptr [ecx]{1to4}
+0x62,0xf6,0x7d,0x18,0x13,0x31
+
+# ATT: vcvtph2psx 1016(%ecx), %xmm6
+# INTEL: vcvtph2psx xmm6, qword ptr [ecx + 1016]
+0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f
+
+# ATT: vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2psx xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80
+
+# ATT: vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2psx ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2psx (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2psx ymm6, word ptr [ecx]{1to8}
+0x62,0xf6,0x7d,0x38,0x13,0x31
+
+# ATT: vcvtph2psx 2032(%ecx), %ymm6
+# INTEL: vcvtph2psx ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f
+
+# ATT: vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2psx ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80
+
+# ATT: vcvtph2qq %xmm5, %xmm6
+# INTEL: vcvtph2qq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7b,0xf5
+
+# ATT: vcvtph2qq %xmm5, %ymm6
+# INTEL: vcvtph2qq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x7b,0xf5
+
+# ATT: vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2qq (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2qq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x7b,0x31
+
+# ATT: vcvtph2qq 508(%ecx), %xmm6
+# INTEL: vcvtph2qq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f
+
+# ATT: vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80
+
+# ATT: vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2qq (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2qq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x7b,0x31
+
+# ATT: vcvtph2qq 1016(%ecx), %ymm6
+# INTEL: vcvtph2qq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f
+
+# ATT: vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80
+
+# ATT: vcvtph2udq %xmm5, %xmm6
+# INTEL: vcvtph2udq xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x79,0xf5
+
+# ATT: vcvtph2udq %xmm5, %ymm6
+# INTEL: vcvtph2udq ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x79,0xf5
+
+# ATT: vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2udq (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2udq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x79,0x31
+
+# ATT: vcvtph2udq 1016(%ecx), %xmm6
+# INTEL: vcvtph2udq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f
+
+# ATT: vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80
+
+# ATT: vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2udq (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2udq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x79,0x31
+
+# ATT: vcvtph2udq 2032(%ecx), %ymm6
+# INTEL: vcvtph2udq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f
+
+# ATT: vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80
+
+# ATT: vcvtph2uqq %xmm5, %xmm6
+# INTEL: vcvtph2uqq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x79,0xf5
+
+# ATT: vcvtph2uqq %xmm5, %ymm6
+# INTEL: vcvtph2uqq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x79,0xf5
+
+# ATT: vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uqq (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2uqq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x79,0x31
+
+# ATT: vcvtph2uqq 508(%ecx), %xmm6
+# INTEL: vcvtph2uqq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f
+
+# ATT: vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80
+
+# ATT: vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uqq (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2uqq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x79,0x31
+
+# ATT: vcvtph2uqq 1016(%ecx), %ymm6
+# INTEL: vcvtph2uqq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f
+
+# ATT: vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80
+
+# ATT: vcvtph2uw %xmm5, %xmm6
+# INTEL: vcvtph2uw xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x7d,0xf5
+
+# ATT: vcvtph2uw %ymm5, %ymm6
+# INTEL: vcvtph2uw ymm6, ymm5
+0x62,0xf5,0x7c,0x28,0x7d,0xf5
+
+# ATT: vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uw (%ecx){1to8}, %xmm6
+# INTEL: vcvtph2uw xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x18,0x7d,0x31
+
+# ATT: vcvtph2uw 2032(%ecx), %xmm6
+# INTEL: vcvtph2uw xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f
+
+# ATT: vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80
+
+# ATT: vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2uw (%ecx){1to16}, %ymm6
+# INTEL: vcvtph2uw ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7c,0x38,0x7d,0x31
+
+# ATT: vcvtph2uw 4064(%ecx), %ymm6
+# INTEL: vcvtph2uw ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f
+
+# ATT: vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80
+
+# ATT: vcvtph2w %xmm5, %xmm6
+# INTEL: vcvtph2w xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7d,0xf5
+
+# ATT: vcvtph2w %ymm5, %ymm6
+# INTEL: vcvtph2w ymm6, ymm5
+0x62,0xf5,0x7d,0x28,0x7d,0xf5
+
+# ATT: vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2w (%ecx){1to8}, %xmm6
+# INTEL: vcvtph2w xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x18,0x7d,0x31
+
+# ATT: vcvtph2w 2032(%ecx), %xmm6
+# INTEL: vcvtph2w xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f
+
+# ATT: vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80
+
+# ATT: vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtph2w (%ecx){1to16}, %ymm6
+# INTEL: vcvtph2w ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7d,0x38,0x7d,0x31
+
+# ATT: vcvtph2w 4064(%ecx), %ymm6
+# INTEL: vcvtph2w ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f
+
+# ATT: vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80
+
+# ATT: vcvtps2phx %xmm5, %xmm6
+# INTEL: vcvtps2phx xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x1d,0xf5
+
+# ATT: vcvtps2phx %ymm5, %xmm6
+# INTEL: vcvtps2phx xmm6, ymm5
+0x62,0xf5,0x7d,0x28,0x1d,0xf5
+
+# ATT: vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtps2phx xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtps2phx (%ecx){1to4}, %xmm6
+# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x18,0x1d,0x31
+
+# ATT: vcvtps2phxx 2032(%ecx), %xmm6
+# INTEL: vcvtps2phx xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f
+
+# ATT: vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80
+
+# ATT: vcvtps2phx (%ecx){1to8}, %xmm6
+# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x38,0x1d,0x31
+
+# ATT: vcvtps2phxy 4064(%ecx), %xmm6
+# INTEL: vcvtps2phx xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f
+
+# ATT: vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80
+
+# ATT: vcvtqq2ph %xmm5, %xmm6
+# INTEL: vcvtqq2ph xmm6, xmm5
+0x62,0xf5,0xfc,0x08,0x5b,0xf5
+
+# ATT: vcvtqq2ph %ymm5, %xmm6
+# INTEL: vcvtqq2ph xmm6, ymm5
+0x62,0xf5,0xfc,0x28,0x5b,0xf5
+
+# ATT: vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtqq2ph (%ecx){1to2}, %xmm6
+# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xfc,0x18,0x5b,0x31
+
+# ATT: vcvtqq2phx 2032(%ecx), %xmm6
+# INTEL: vcvtqq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f
+
+# ATT: vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80
+
+# ATT: vcvtqq2ph (%ecx){1to4}, %xmm6
+# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xfc,0x38,0x5b,0x31
+
+# ATT: vcvtqq2phy 4064(%ecx), %xmm6
+# INTEL: vcvtqq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f
+
+# ATT: vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80
+
+# ATT: vcvttph2dq %xmm5, %xmm6
+# INTEL: vcvttph2dq xmm6, xmm5
+0x62,0xf5,0x7e,0x08,0x5b,0xf5
+
+# ATT: vcvttph2dq %xmm5, %ymm6
+# INTEL: vcvttph2dq ymm6, xmm5
+0x62,0xf5,0x7e,0x28,0x5b,0xf5
+
+# ATT: vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2dq (%ecx){1to4}, %xmm6
+# INTEL: vcvttph2dq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7e,0x18,0x5b,0x31
+
+# ATT: vcvttph2dq 1016(%ecx), %xmm6
+# INTEL: vcvttph2dq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f
+
+# ATT: vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80
+
+# ATT: vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2dq (%ecx){1to8}, %ymm6
+# INTEL: vcvttph2dq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7e,0x38,0x5b,0x31
+
+# ATT: vcvttph2dq 2032(%ecx), %ymm6
+# INTEL: vcvttph2dq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f
+
+# ATT: vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80
+
+# ATT: vcvttph2qq %xmm5, %xmm6
+# INTEL: vcvttph2qq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7a,0xf5
+
+# ATT: vcvttph2qq %xmm5, %ymm6
+# INTEL: vcvttph2qq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x7a,0xf5
+
+# ATT: vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2qq (%ecx){1to2}, %xmm6
+# INTEL: vcvttph2qq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x7a,0x31
+
+# ATT: vcvttph2qq 508(%ecx), %xmm6
+# INTEL: vcvttph2qq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f
+
+# ATT: vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80
+
+# ATT: vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2qq (%ecx){1to4}, %ymm6
+# INTEL: vcvttph2qq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x7a,0x31
+
+# ATT: vcvttph2qq 1016(%ecx), %ymm6
+# INTEL: vcvttph2qq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f
+
+# ATT: vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80
+
+# ATT: vcvttph2udq %xmm5, %xmm6
+# INTEL: vcvttph2udq xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x78,0xf5
+
+# ATT: vcvttph2udq %xmm5, %ymm6
+# INTEL: vcvttph2udq ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x78,0xf5
+
+# ATT: vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2udq (%ecx){1to4}, %xmm6
+# INTEL: vcvttph2udq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x78,0x31
+
+# ATT: vcvttph2udq 1016(%ecx), %xmm6
+# INTEL: vcvttph2udq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f
+
+# ATT: vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80
+
+# ATT: vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2udq (%ecx){1to8}, %ymm6
+# INTEL: vcvttph2udq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x78,0x31
+
+# ATT: vcvttph2udq 2032(%ecx), %ymm6
+# INTEL: vcvttph2udq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f
+
+# ATT: vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80
+
+# ATT: vcvttph2uqq %xmm5, %xmm6
+# INTEL: vcvttph2uqq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x78,0xf5
+
+# ATT: vcvttph2uqq %xmm5, %ymm6
+# INTEL: vcvttph2uqq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x78,0xf5
+
+# ATT: vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uqq (%ecx){1to2}, %xmm6
+# INTEL: vcvttph2uqq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x78,0x31
+
+# ATT: vcvttph2uqq 508(%ecx), %xmm6
+# INTEL: vcvttph2uqq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f
+
+# ATT: vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80
+
+# ATT: vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uqq (%ecx){1to4}, %ymm6
+# INTEL: vcvttph2uqq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x78,0x31
+
+# ATT: vcvttph2uqq 1016(%ecx), %ymm6
+# INTEL: vcvttph2uqq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f
+
+# ATT: vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80
+
+# ATT: vcvttph2uw %xmm5, %xmm6
+# INTEL: vcvttph2uw xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x7c,0xf5
+
+# ATT: vcvttph2uw %ymm5, %ymm6
+# INTEL: vcvttph2uw ymm6, ymm5
+0x62,0xf5,0x7c,0x28,0x7c,0xf5
+
+# ATT: vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uw (%ecx){1to8}, %xmm6
+# INTEL: vcvttph2uw xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x18,0x7c,0x31
+
+# ATT: vcvttph2uw 2032(%ecx), %xmm6
+# INTEL: vcvttph2uw xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f
+
+# ATT: vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80
+
+# ATT: vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2uw (%ecx){1to16}, %ymm6
+# INTEL: vcvttph2uw ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7c,0x38,0x7c,0x31
+
+# ATT: vcvttph2uw 4064(%ecx), %ymm6
+# INTEL: vcvttph2uw ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f
+
+# ATT: vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80
+
+# ATT: vcvttph2w %xmm5, %xmm6
+# INTEL: vcvttph2w xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7c,0xf5
+
+# ATT: vcvttph2w %ymm5, %ymm6
+# INTEL: vcvttph2w ymm6, ymm5
+0x62,0xf5,0x7d,0x28,0x7c,0xf5
+
+# ATT: vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2w (%ecx){1to8}, %xmm6
+# INTEL: vcvttph2w xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x18,0x7c,0x31
+
+# ATT: vcvttph2w 2032(%ecx), %xmm6
+# INTEL: vcvttph2w xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f
+
+# ATT: vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80
+
+# ATT: vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttph2w (%ecx){1to16}, %ymm6
+# INTEL: vcvttph2w ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7d,0x38,0x7c,0x31
+
+# ATT: vcvttph2w 4064(%ecx), %ymm6
+# INTEL: vcvttph2w ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f
+
+# ATT: vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80
+
+# ATT: vcvtudq2ph %xmm5, %xmm6
+# INTEL: vcvtudq2ph xmm6, xmm5
+0x62,0xf5,0x7f,0x08,0x7a,0xf5
+
+# ATT: vcvtudq2ph %ymm5, %xmm6
+# INTEL: vcvtudq2ph xmm6, ymm5
+0x62,0xf5,0x7f,0x28,0x7a,0xf5
+
+# ATT: vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtudq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtudq2ph (%ecx){1to4}, %xmm6
+# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7f,0x18,0x7a,0x31
+
+# ATT: vcvtudq2phx 2032(%ecx), %xmm6
+# INTEL: vcvtudq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f
+
+# ATT: vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80
+
+# ATT: vcvtudq2ph (%ecx){1to8}, %xmm6
+# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7f,0x38,0x7a,0x31
+
+# ATT: vcvtudq2phy 4064(%ecx), %xmm6
+# INTEL: vcvtudq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f
+
+# ATT: vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80
+
+# ATT: vcvtuqq2ph %xmm5, %xmm6
+# INTEL: vcvtuqq2ph xmm6, xmm5
+0x62,0xf5,0xff,0x08,0x7a,0xf5
+
+# ATT: vcvtuqq2ph %ymm5, %xmm6
+# INTEL: vcvtuqq2ph xmm6, ymm5
+0x62,0xf5,0xff,0x28,0x7a,0xf5
+
+# ATT: vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtuqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtuqq2ph (%ecx){1to2}, %xmm6
+# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xff,0x18,0x7a,0x31
+
+# ATT: vcvtuqq2phx 2032(%ecx), %xmm6
+# INTEL: vcvtuqq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f
+
+# ATT: vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80
+
+# ATT: vcvtuqq2ph (%ecx){1to4}, %xmm6
+# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xff,0x38,0x7a,0x31
+
+# ATT: vcvtuqq2phy 4064(%ecx), %xmm6
+# INTEL: vcvtuqq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f
+
+# ATT: vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80
+
+# ATT: vcvtuw2ph %xmm5, %xmm6
+# INTEL: vcvtuw2ph xmm6, xmm5
+0x62,0xf5,0x7f,0x08,0x7d,0xf5
+
+# ATT: vcvtuw2ph %ymm5, %ymm6
+# INTEL: vcvtuw2ph ymm6, ymm5
+0x62,0xf5,0x7f,0x28,0x7d,0xf5
+
+# ATT: vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtuw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtuw2ph (%ecx){1to8}, %xmm6
+# INTEL: vcvtuw2ph xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7f,0x18,0x7d,0x31
+
+# ATT: vcvtuw2ph 2032(%ecx), %xmm6
+# INTEL: vcvtuw2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f
+
+# ATT: vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtuw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80
+
+# ATT: vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtuw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtuw2ph (%ecx){1to16}, %ymm6
+# INTEL: vcvtuw2ph ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7f,0x38,0x7d,0x31
+
+# ATT: vcvtuw2ph 4064(%ecx), %ymm6
+# INTEL: vcvtuw2ph ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f
+
+# ATT: vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtuw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80
+
+# ATT: vcvtw2ph %xmm5, %xmm6
+# INTEL: vcvtw2ph xmm6, xmm5
+0x62,0xf5,0x7e,0x08,0x7d,0xf5
+
+# ATT: vcvtw2ph %ymm5, %ymm6
+# INTEL: vcvtw2ph ymm6, ymm5
+0x62,0xf5,0x7e,0x28,0x7d,0xf5
+
+# ATT: vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtw2ph (%ecx){1to8}, %xmm6
+# INTEL: vcvtw2ph xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7e,0x18,0x7d,0x31
+
+# ATT: vcvtw2ph 2032(%ecx), %xmm6
+# INTEL: vcvtw2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f
+
+# ATT: vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80
+
+# ATT: vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvtw2ph (%ecx){1to16}, %ymm6
+# INTEL: vcvtw2ph ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7e,0x38,0x7d,0x31
+
+# ATT: vcvtw2ph 4064(%ecx), %ymm6
+# INTEL: vcvtw2ph ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f
+
+# ATT: vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80
diff --git a/llvm/test/MC/X86/avx512fp16.s b/llvm/test/MC/X86/avx512fp16.s
index c45d0956faa1c..1ca659f29acea 100644
--- a/llvm/test/MC/X86/avx512fp16.s
+++ b/llvm/test/MC/X86/avx512fp16.s
@@ -459,3 +459,899 @@
// CHECK: vucomish -256(%rdx), %xmm30
// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x2e,0x72,0x80]
vucomish -256(%rdx), %xmm30
+
+// CHECK: vcvtdq2ph %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5b,0xf5]
+ vcvtdq2ph %zmm29, %ymm30
+
+// CHECK: vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5b,0xf5]
+ vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtdq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtdq2ph (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5b,0x31]
+ vcvtdq2ph (%r9){1to16}, %ymm30
+
+// CHECK: vcvtdq2ph 8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f]
+ vcvtdq2ph 8128(%rcx), %ymm30
+
+// CHECK: vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80]
+ vcvtdq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtpd2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfd,0x48,0x5a,0xf5]
+ vcvtpd2ph %zmm29, %xmm30
+
+// CHECK: vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfd,0x18,0x5a,0xf5]
+ vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtpd2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtpd2ph (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xfd,0x58,0x5a,0x31]
+ vcvtpd2ph (%r9){1to8}, %xmm30
+
+// CHECK: vcvtpd2phz 8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f]
+ vcvtpd2phz 8128(%rcx), %xmm30
+
+// CHECK: vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80]
+ vcvtpd2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtph2dq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x5b,0xf5]
+ vcvtph2dq %ymm29, %zmm30
+
+// CHECK: vcvtph2dq {rn-sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x5b,0xf5]
+ vcvtph2dq {rn-sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2dq (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x5b,0x31]
+ vcvtph2dq (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2dq 4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f]
+ vcvtph2dq 4064(%rcx), %zmm30
+
+// CHECK: vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80]
+ vcvtph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2pd %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5a,0xf5]
+ vcvtph2pd %xmm29, %zmm30
+
+// CHECK: vcvtph2pd {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5a,0xf5]
+ vcvtph2pd {sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2pd 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2pd (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5a,0x31]
+ vcvtph2pd (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2pd 2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f]
+ vcvtph2pd 2032(%rcx), %zmm30
+
+// CHECK: vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80]
+ vcvtph2pd -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2psx %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x06,0x7d,0x48,0x13,0xf5]
+ vcvtph2psx %ymm29, %zmm30
+
+// CHECK: vcvtph2psx {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x06,0x7d,0x18,0x13,0xf5]
+ vcvtph2psx {sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2psx 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2psx (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x46,0x7d,0x58,0x13,0x31]
+ vcvtph2psx (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2psx 4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x66,0x7d,0x48,0x13,0x71,0x7f]
+ vcvtph2psx 4064(%rcx), %zmm30
+
+// CHECK: vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x66,0x7d,0xdf,0x13,0x72,0x80]
+ vcvtph2psx -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2qq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7b,0xf5]
+ vcvtph2qq %xmm29, %zmm30
+
+// CHECK: vcvtph2qq {rn-sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7b,0xf5]
+ vcvtph2qq {rn-sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2qq (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7b,0x31]
+ vcvtph2qq (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2qq 2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f]
+ vcvtph2qq 2032(%rcx), %zmm30
+
+// CHECK: vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80]
+ vcvtph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2udq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x79,0xf5]
+ vcvtph2udq %ymm29, %zmm30
+
+// CHECK: vcvtph2udq {rn-sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x79,0xf5]
+ vcvtph2udq {rn-sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2udq (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x79,0x31]
+ vcvtph2udq (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2udq 4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x79,0x71,0x7f]
+ vcvtph2udq 4064(%rcx), %zmm30
+
+// CHECK: vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x79,0x72,0x80]
+ vcvtph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2uqq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x79,0xf5]
+ vcvtph2uqq %xmm29, %zmm30
+
+// CHECK: vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x79,0xf5]
+ vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2uqq (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x79,0x31]
+ vcvtph2uqq (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2uqq 2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x79,0x71,0x7f]
+ vcvtph2uqq 2032(%rcx), %zmm30
+
+// CHECK: vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x79,0x72,0x80]
+ vcvtph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2uw %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7d,0xf5]
+ vcvtph2uw %zmm29, %zmm30
+
+// CHECK: vcvtph2uw {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7d,0xf5]
+ vcvtph2uw {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2uw (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7d,0x31]
+ vcvtph2uw (%r9){1to32}, %zmm30
+
+// CHECK: vcvtph2uw 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f]
+ vcvtph2uw 8128(%rcx), %zmm30
+
+// CHECK: vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80]
+ vcvtph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2w %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7d,0xf5]
+ vcvtph2w %zmm29, %zmm30
+
+// CHECK: vcvtph2w {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7d,0xf5]
+ vcvtph2w {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2w (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7d,0x31]
+ vcvtph2w (%r9){1to32}, %zmm30
+
+// CHECK: vcvtph2w 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f]
+ vcvtph2w 8128(%rcx), %zmm30
+
+// CHECK: vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80]
+ vcvtph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtps2phx %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x1d,0xf5]
+ vcvtps2phx %zmm29, %ymm30
+
+// CHECK: vcvtps2phx {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x1d,0xf5]
+ vcvtps2phx {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtps2phx 268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtps2phx (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x1d,0x31]
+ vcvtps2phx (%r9){1to16}, %ymm30
+
+// CHECK: vcvtps2phx 8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f]
+ vcvtps2phx 8128(%rcx), %ymm30
+
+// CHECK: vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80]
+ vcvtps2phx -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtqq2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfc,0x48,0x5b,0xf5]
+ vcvtqq2ph %zmm29, %xmm30
+
+// CHECK: vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfc,0x18,0x5b,0xf5]
+ vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtqq2ph (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xfc,0x58,0x5b,0x31]
+ vcvtqq2ph (%r9){1to8}, %xmm30
+
+// CHECK: vcvtqq2phz 8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f]
+ vcvtqq2phz 8128(%rcx), %xmm30
+
+// CHECK: vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80]
+ vcvtqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsd2sh %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x97,0x00,0x5a,0xf4]
+ vcvtsd2sh %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x97,0x10,0x5a,0xf4]
+ vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsd2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsd2sh (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x97,0x00,0x5a,0x31]
+ vcvtsd2sh (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x97,0x00,0x5a,0x71,0x7f]
+ vcvtsd2sh 1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x97,0x87,0x5a,0x72,0x80]
+ vcvtsd2sh -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2sd %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x16,0x00,0x5a,0xf4]
+ vcvtsh2sd %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x16,0x10,0x5a,0xf4]
+ vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2sd 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsh2sd (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x5a,0x31]
+ vcvtsh2sd (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd 254(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x5a,0x71,0x7f]
+ vcvtsh2sd 254(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x16,0x87,0x5a,0x72,0x80]
+ vcvtsh2sd -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2si %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2d,0xd6]
+ vcvtsh2si %xmm30, %edx
+
+// CHECK: vcvtsh2si {rn-sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2d,0xd6]
+ vcvtsh2si {rn-sae}, %xmm30, %edx
+
+// CHECK: vcvtsh2si %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2d,0xe6]
+ vcvtsh2si %xmm30, %r12
+
+// CHECK: vcvtsh2si {rn-sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2d,0xe6]
+ vcvtsh2si {rn-sae}, %xmm30, %r12
+
+// CHECK: vcvtsh2si 268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2si 268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvtsh2si (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2d,0x11]
+ vcvtsh2si (%r9), %edx
+
+// CHECK: vcvtsh2si 254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
+ vcvtsh2si 254(%rcx), %edx
+
+// CHECK: vcvtsh2si -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
+ vcvtsh2si -256(%rdx), %edx
+
+// CHECK: vcvtsh2si 268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2si 268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvtsh2si (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2d,0x21]
+ vcvtsh2si (%r9), %r12
+
+// CHECK: vcvtsh2si 254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f]
+ vcvtsh2si 254(%rcx), %r12
+
+// CHECK: vcvtsh2si -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x62,0x80]
+ vcvtsh2si -256(%rdx), %r12
+
+// CHECK: vcvtsh2ss %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x06,0x14,0x00,0x13,0xf4]
+ vcvtsh2ss %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x06,0x14,0x10,0x13,0xf4]
+ vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2ss 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsh2ss (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x46,0x14,0x00,0x13,0x31]
+ vcvtsh2ss (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss 254(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x66,0x14,0x00,0x13,0x71,0x7f]
+ vcvtsh2ss 254(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x66,0x14,0x87,0x13,0x72,0x80]
+ vcvtsh2ss -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2usi %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x79,0xd6]
+ vcvtsh2usi %xmm30, %edx
+
+// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x79,0xd6]
+ vcvtsh2usi {rn-sae}, %xmm30, %edx
+
+// CHECK: vcvtsh2usi %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x79,0xe6]
+ vcvtsh2usi %xmm30, %r12
+
+// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x79,0xe6]
+ vcvtsh2usi {rn-sae}, %xmm30, %r12
+
+// CHECK: vcvtsh2usi 268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2usi 268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvtsh2usi (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x79,0x11]
+ vcvtsh2usi (%r9), %edx
+
+// CHECK: vcvtsh2usi 254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
+ vcvtsh2usi 254(%rcx), %edx
+
+// CHECK: vcvtsh2usi -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
+ vcvtsh2usi -256(%rdx), %edx
+
+// CHECK: vcvtsh2usi 268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsh2usi 268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvtsh2usi (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x79,0x21]
+ vcvtsh2usi (%r9), %r12
+
+// CHECK: vcvtsh2usi 254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x61,0x7f]
+ vcvtsh2usi 254(%rcx), %r12
+
+// CHECK: vcvtsh2usi -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x62,0x80]
+ vcvtsh2usi -256(%rdx), %r12
+
+// CHECK: vcvtsi2sh %r12, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x00,0x2a,0xf4]
+ vcvtsi2sh %r12, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x10,0x2a,0xf4]
+ vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %edx, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0xf2]
+ vcvtsi2sh %edx, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x10,0x2a,0xf2]
+ vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtsi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x2a,0x31]
+ vcvtsi2shl (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl 508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x71,0x7f]
+ vcvtsi2shl 508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl -512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x72,0x80]
+ vcvtsi2shl -512(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x71,0x7f]
+ vcvtsi2shq 1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x72,0x80]
+ vcvtsi2shq -1024(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x14,0x00,0x1d,0xf4]
+ vcvtss2sh %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x14,0x10,0x1d,0xf4]
+ vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtss2sh 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtss2sh (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x14,0x00,0x1d,0x31]
+ vcvtss2sh (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh 508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x14,0x00,0x1d,0x71,0x7f]
+ vcvtss2sh 508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x14,0x87,0x1d,0x72,0x80]
+ vcvtss2sh -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvttph2dq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x5b,0xf5]
+ vcvttph2dq %ymm29, %zmm30
+
+// CHECK: vcvttph2dq {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x5b,0xf5]
+ vcvttph2dq {sae}, %ymm29, %zmm30
+
+// CHECK: vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2dq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2dq (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x5b,0x31]
+ vcvttph2dq (%r9){1to16}, %zmm30
+
+// CHECK: vcvttph2dq 4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f]
+ vcvttph2dq 4064(%rcx), %zmm30
+
+// CHECK: vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80]
+ vcvttph2dq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2qq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7a,0xf5]
+ vcvttph2qq %xmm29, %zmm30
+
+// CHECK: vcvttph2qq {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7a,0xf5]
+ vcvttph2qq {sae}, %xmm29, %zmm30
+
+// CHECK: vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2qq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2qq (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7a,0x31]
+ vcvttph2qq (%r9){1to8}, %zmm30
+
+// CHECK: vcvttph2qq 2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f]
+ vcvttph2qq 2032(%rcx), %zmm30
+
+// CHECK: vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80]
+ vcvttph2qq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2udq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x78,0xf5]
+ vcvttph2udq %ymm29, %zmm30
+
+// CHECK: vcvttph2udq {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x78,0xf5]
+ vcvttph2udq {sae}, %ymm29, %zmm30
+
+// CHECK: vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2udq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2udq (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x78,0x31]
+ vcvttph2udq (%r9){1to16}, %zmm30
+
+// CHECK: vcvttph2udq 4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x78,0x71,0x7f]
+ vcvttph2udq 4064(%rcx), %zmm30
+
+// CHECK: vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x78,0x72,0x80]
+ vcvttph2udq -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2uqq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x78,0xf5]
+ vcvttph2uqq %xmm29, %zmm30
+
+// CHECK: vcvttph2uqq {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x78,0xf5]
+ vcvttph2uqq {sae}, %xmm29, %zmm30
+
+// CHECK: vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uqq 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2uqq (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x78,0x31]
+ vcvttph2uqq (%r9){1to8}, %zmm30
+
+// CHECK: vcvttph2uqq 2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x78,0x71,0x7f]
+ vcvttph2uqq 2032(%rcx), %zmm30
+
+// CHECK: vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x78,0x72,0x80]
+ vcvttph2uqq -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2uw %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7c,0xf5]
+ vcvttph2uw %zmm29, %zmm30
+
+// CHECK: vcvttph2uw {sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7c,0xf5]
+ vcvttph2uw {sae}, %zmm29, %zmm30
+
+// CHECK: vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uw 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2uw (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7c,0x31]
+ vcvttph2uw (%r9){1to32}, %zmm30
+
+// CHECK: vcvttph2uw 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f]
+ vcvttph2uw 8128(%rcx), %zmm30
+
+// CHECK: vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80]
+ vcvttph2uw -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2w %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7c,0xf5]
+ vcvttph2w %zmm29, %zmm30
+
+// CHECK: vcvttph2w {sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7c,0xf5]
+ vcvttph2w {sae}, %zmm29, %zmm30
+
+// CHECK: vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2w 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2w (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7c,0x31]
+ vcvttph2w (%r9){1to32}, %zmm30
+
+// CHECK: vcvttph2w 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f]
+ vcvttph2w 8128(%rcx), %zmm30
+
+// CHECK: vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80]
+ vcvttph2w -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttsh2si %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2c,0xd6]
+ vcvttsh2si %xmm30, %edx
+
+// CHECK: vcvttsh2si {sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2c,0xd6]
+ vcvttsh2si {sae}, %xmm30, %edx
+
+// CHECK: vcvttsh2si %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2c,0xe6]
+ vcvttsh2si %xmm30, %r12
+
+// CHECK: vcvttsh2si {sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2c,0xe6]
+ vcvttsh2si {sae}, %xmm30, %r12
+
+// CHECK: vcvttsh2si 268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsh2si 268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvttsh2si (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2c,0x11]
+ vcvttsh2si (%r9), %edx
+
+// CHECK: vcvttsh2si 254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
+ vcvttsh2si 254(%rcx), %edx
+
+// CHECK: vcvttsh2si -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
+ vcvttsh2si -256(%rdx), %edx
+
+// CHECK: vcvttsh2si 268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsh2si 268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvttsh2si (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2c,0x21]
+ vcvttsh2si (%r9), %r12
+
+// CHECK: vcvttsh2si 254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f]
+ vcvttsh2si 254(%rcx), %r12
+
+// CHECK: vcvttsh2si -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x62,0x80]
+ vcvttsh2si -256(%rdx), %r12
+
+// CHECK: vcvttsh2usi %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x78,0xd6]
+ vcvttsh2usi %xmm30, %edx
+
+// CHECK: vcvttsh2usi {sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x78,0xd6]
+ vcvttsh2usi {sae}, %xmm30, %edx
+
+// CHECK: vcvttsh2usi %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x78,0xe6]
+ vcvttsh2usi %xmm30, %r12
+
+// CHECK: vcvttsh2usi {sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x78,0xe6]
+ vcvttsh2usi {sae}, %xmm30, %r12
+
+// CHECK: vcvttsh2usi 268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsh2usi 268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvttsh2usi (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x78,0x11]
+ vcvttsh2usi (%r9), %edx
+
+// CHECK: vcvttsh2usi 254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
+ vcvttsh2usi 254(%rcx), %edx
+
+// CHECK: vcvttsh2usi -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
+ vcvttsh2usi -256(%rdx), %edx
+
+// CHECK: vcvttsh2usi 268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsh2usi 268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvttsh2usi (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x78,0x21]
+ vcvttsh2usi (%r9), %r12
+
+// CHECK: vcvttsh2usi 254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x61,0x7f]
+ vcvttsh2usi 254(%rcx), %r12
+
+// CHECK: vcvttsh2usi -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x62,0x80]
+ vcvttsh2usi -256(%rdx), %r12
+
+// CHECK: vcvtudq2ph %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7a,0xf5]
+ vcvtudq2ph %zmm29, %ymm30
+
+// CHECK: vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7a,0xf5]
+ vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtudq2ph 268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtudq2ph (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7a,0x31]
+ vcvtudq2ph (%r9){1to16}, %ymm30
+
+// CHECK: vcvtudq2ph 8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f]
+ vcvtudq2ph 8128(%rcx), %ymm30
+
+// CHECK: vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80]
+ vcvtudq2ph -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtuqq2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xff,0x48,0x7a,0xf5]
+ vcvtuqq2ph %zmm29, %xmm30
+
+// CHECK: vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xff,0x18,0x7a,0xf5]
+ vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtuqq2phz 268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtuqq2ph (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xff,0x58,0x7a,0x31]
+ vcvtuqq2ph (%r9){1to8}, %xmm30
+
+// CHECK: vcvtuqq2phz 8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xff,0x48,0x7a,0x71,0x7f]
+ vcvtuqq2phz 8128(%rcx), %xmm30
+
+// CHECK: vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xff,0xdf,0x7a,0x72,0x80]
+ vcvtuqq2ph -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtusi2sh %r12, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x00,0x7b,0xf4]
+ vcvtusi2sh %r12, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x10,0x7b,0xf4]
+ vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %edx, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0xf2]
+ vcvtusi2sh %edx, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x10,0x7b,0xf2]
+ vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtusi2shl 268435456(%rbp,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x7b,0x31]
+ vcvtusi2shl (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl 508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x71,0x7f]
+ vcvtusi2shl 508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl -512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x72,0x80]
+ vcvtusi2shl -512(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x71,0x7f]
+ vcvtusi2shq 1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x72,0x80]
+ vcvtusi2shq -1024(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtuw2ph %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7d,0xf5]
+ vcvtuw2ph %zmm29, %zmm30
+
+// CHECK: vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7d,0xf5]
+ vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtuw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtuw2ph (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7d,0x31]
+ vcvtuw2ph (%r9){1to32}, %zmm30
+
+// CHECK: vcvtuw2ph 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f]
+ vcvtuw2ph 8128(%rcx), %zmm30
+
+// CHECK: vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80]
+ vcvtuw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtw2ph %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x7d,0xf5]
+ vcvtw2ph %zmm29, %zmm30
+
+// CHECK: vcvtw2ph {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x7d,0xf5]
+ vcvtw2ph {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtw2ph 268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtw2ph (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x7d,0x31]
+ vcvtw2ph (%r9){1to32}, %zmm30
+
+// CHECK: vcvtw2ph 8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f]
+ vcvtw2ph 8128(%rcx), %zmm30
+
+// CHECK: vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80]
+ vcvtw2ph -256(%rdx){1to32}, %zmm30 {%k7} {z}
diff --git a/llvm/test/MC/X86/avx512fp16vl.s b/llvm/test/MC/X86/avx512fp16vl.s
index e0ce1b996e906..466af9663d21a 100644
--- a/llvm/test/MC/X86/avx512fp16vl.s
+++ b/llvm/test/MC/X86/avx512fp16vl.s
@@ -279,3 +279,859 @@
// CHECK: vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
// CHECK: encoding: [0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80]
vsubph -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
+
+// CHECK: vcvtdq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0xf5]
+ vcvtdq2ph %xmm5, %xmm6
+
+// CHECK: vcvtdq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0xf5]
+ vcvtdq2ph %ymm5, %xmm6
+
+// CHECK: vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtdq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtdq2ph (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0x31]
+ vcvtdq2ph (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtdq2phx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f]
+ vcvtdq2phx 2032(%ecx), %xmm6
+
+// CHECK: vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80]
+ vcvtdq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtdq2ph (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5b,0x31]
+ vcvtdq2ph (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtdq2phy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f]
+ vcvtdq2phy 4064(%ecx), %xmm6
+
+// CHECK: vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80]
+ vcvtdq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtpd2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0xf5]
+ vcvtpd2ph %xmm5, %xmm6
+
+// CHECK: vcvtpd2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0xf5]
+ vcvtpd2ph %ymm5, %xmm6
+
+// CHECK: vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtpd2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtpd2ph (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0x31]
+ vcvtpd2ph (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtpd2phx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f]
+ vcvtpd2phx 2032(%ecx), %xmm6
+
+// CHECK: vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80]
+ vcvtpd2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtpd2ph (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x5a,0x31]
+ vcvtpd2ph (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtpd2phy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f]
+ vcvtpd2phy 4064(%ecx), %xmm6
+
+// CHECK: vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80]
+ vcvtpd2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2dq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0xf5]
+ vcvtph2dq %xmm5, %xmm6
+
+// CHECK: vcvtph2dq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0xf5]
+ vcvtph2dq %xmm5, %ymm6
+
+// CHECK: vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2dq (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0x31]
+ vcvtph2dq (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2dq 1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f]
+ vcvtph2dq 1016(%ecx), %xmm6
+
+// CHECK: vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80]
+ vcvtph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2dq (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x5b,0x31]
+ vcvtph2dq (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2dq 2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f]
+ vcvtph2dq 2032(%ecx), %ymm6
+
+// CHECK: vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80]
+ vcvtph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2pd %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0xf5]
+ vcvtph2pd %xmm5, %xmm6
+
+// CHECK: vcvtph2pd %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0xf5]
+ vcvtph2pd %xmm5, %ymm6
+
+// CHECK: vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2pd 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2pd (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0x31]
+ vcvtph2pd (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2pd 508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f]
+ vcvtph2pd 508(%ecx), %xmm6
+
+// CHECK: vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80]
+ vcvtph2pd -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2pd 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2pd (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5a,0x31]
+ vcvtph2pd (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2pd 1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f]
+ vcvtph2pd 1016(%ecx), %ymm6
+
+// CHECK: vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80]
+ vcvtph2pd -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2psx %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0xf5]
+ vcvtph2psx %xmm5, %xmm6
+
+// CHECK: vcvtph2psx %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0xf5]
+ vcvtph2psx %xmm5, %ymm6
+
+// CHECK: vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2psx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2psx (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0x31]
+ vcvtph2psx (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2psx 1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f]
+ vcvtph2psx 1016(%ecx), %xmm6
+
+// CHECK: vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80]
+ vcvtph2psx -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2psx 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2psx (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x38,0x13,0x31]
+ vcvtph2psx (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2psx 2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f]
+ vcvtph2psx 2032(%ecx), %ymm6
+
+// CHECK: vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80]
+ vcvtph2psx -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2qq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0xf5]
+ vcvtph2qq %xmm5, %xmm6
+
+// CHECK: vcvtph2qq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0xf5]
+ vcvtph2qq %xmm5, %ymm6
+
+// CHECK: vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2qq (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0x31]
+ vcvtph2qq (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2qq 508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f]
+ vcvtph2qq 508(%ecx), %xmm6
+
+// CHECK: vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80]
+ vcvtph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2qq (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7b,0x31]
+ vcvtph2qq (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2qq 1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f]
+ vcvtph2qq 1016(%ecx), %ymm6
+
+// CHECK: vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80]
+ vcvtph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2udq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0xf5]
+ vcvtph2udq %xmm5, %xmm6
+
+// CHECK: vcvtph2udq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0xf5]
+ vcvtph2udq %xmm5, %ymm6
+
+// CHECK: vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2udq (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0x31]
+ vcvtph2udq (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2udq 1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f]
+ vcvtph2udq 1016(%ecx), %xmm6
+
+// CHECK: vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80]
+ vcvtph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2udq (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x79,0x31]
+ vcvtph2udq (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2udq 2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f]
+ vcvtph2udq 2032(%ecx), %ymm6
+
+// CHECK: vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80]
+ vcvtph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2uqq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0xf5]
+ vcvtph2uqq %xmm5, %xmm6
+
+// CHECK: vcvtph2uqq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0xf5]
+ vcvtph2uqq %xmm5, %ymm6
+
+// CHECK: vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2uqq (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0x31]
+ vcvtph2uqq (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2uqq 508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f]
+ vcvtph2uqq 508(%ecx), %xmm6
+
+// CHECK: vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80]
+ vcvtph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2uqq (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x79,0x31]
+ vcvtph2uqq (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2uqq 1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f]
+ vcvtph2uqq 1016(%ecx), %ymm6
+
+// CHECK: vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80]
+ vcvtph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2uw %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0xf5]
+ vcvtph2uw %xmm5, %xmm6
+
+// CHECK: vcvtph2uw %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0xf5]
+ vcvtph2uw %ymm5, %ymm6
+
+// CHECK: vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2uw (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0x31]
+ vcvtph2uw (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtph2uw 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f]
+ vcvtph2uw 2032(%ecx), %xmm6
+
+// CHECK: vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80]
+ vcvtph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2uw (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7d,0x31]
+ vcvtph2uw (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtph2uw 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f]
+ vcvtph2uw 4064(%ecx), %ymm6
+
+// CHECK: vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80]
+ vcvtph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2w %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0xf5]
+ vcvtph2w %xmm5, %xmm6
+
+// CHECK: vcvtph2w %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0xf5]
+ vcvtph2w %ymm5, %ymm6
+
+// CHECK: vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2w (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0x31]
+ vcvtph2w (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtph2w 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f]
+ vcvtph2w 2032(%ecx), %xmm6
+
+// CHECK: vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80]
+ vcvtph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2w (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7d,0x31]
+ vcvtph2w (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtph2w 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f]
+ vcvtph2w 4064(%ecx), %ymm6
+
+// CHECK: vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80]
+ vcvtph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtps2phx %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0xf5]
+ vcvtps2phx %xmm5, %xmm6
+
+// CHECK: vcvtps2phx %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0xf5]
+ vcvtps2phx %ymm5, %xmm6
+
+// CHECK: vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtps2phxx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtps2phx (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0x31]
+ vcvtps2phx (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtps2phxx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f]
+ vcvtps2phxx 2032(%ecx), %xmm6
+
+// CHECK: vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80]
+ vcvtps2phx -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtps2phx (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x1d,0x31]
+ vcvtps2phx (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtps2phxy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f]
+ vcvtps2phxy 4064(%ecx), %xmm6
+
+// CHECK: vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80]
+ vcvtps2phx -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtqq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0xf5]
+ vcvtqq2ph %xmm5, %xmm6
+
+// CHECK: vcvtqq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0xf5]
+ vcvtqq2ph %ymm5, %xmm6
+
+// CHECK: vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtqq2ph (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0x31]
+ vcvtqq2ph (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtqq2phx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f]
+ vcvtqq2phx 2032(%ecx), %xmm6
+
+// CHECK: vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80]
+ vcvtqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtqq2ph (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x5b,0x31]
+ vcvtqq2ph (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtqq2phy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f]
+ vcvtqq2phy 4064(%ecx), %xmm6
+
+// CHECK: vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80]
+ vcvtqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2dq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0xf5]
+ vcvttph2dq %xmm5, %xmm6
+
+// CHECK: vcvttph2dq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0xf5]
+ vcvttph2dq %xmm5, %ymm6
+
+// CHECK: vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2dq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2dq (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0x31]
+ vcvttph2dq (%ecx){1to4}, %xmm6
+
+// CHECK: vcvttph2dq 1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f]
+ vcvttph2dq 1016(%ecx), %xmm6
+
+// CHECK: vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80]
+ vcvttph2dq -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2dq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2dq (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x5b,0x31]
+ vcvttph2dq (%ecx){1to8}, %ymm6
+
+// CHECK: vcvttph2dq 2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f]
+ vcvttph2dq 2032(%ecx), %ymm6
+
+// CHECK: vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80]
+ vcvttph2dq -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2qq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0xf5]
+ vcvttph2qq %xmm5, %xmm6
+
+// CHECK: vcvttph2qq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0xf5]
+ vcvttph2qq %xmm5, %ymm6
+
+// CHECK: vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2qq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2qq (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0x31]
+ vcvttph2qq (%ecx){1to2}, %xmm6
+
+// CHECK: vcvttph2qq 508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f]
+ vcvttph2qq 508(%ecx), %xmm6
+
+// CHECK: vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80]
+ vcvttph2qq -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2qq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2qq (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7a,0x31]
+ vcvttph2qq (%ecx){1to4}, %ymm6
+
+// CHECK: vcvttph2qq 1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f]
+ vcvttph2qq 1016(%ecx), %ymm6
+
+// CHECK: vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80]
+ vcvttph2qq -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2udq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0xf5]
+ vcvttph2udq %xmm5, %xmm6
+
+// CHECK: vcvttph2udq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0xf5]
+ vcvttph2udq %xmm5, %ymm6
+
+// CHECK: vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2udq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2udq (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0x31]
+ vcvttph2udq (%ecx){1to4}, %xmm6
+
+// CHECK: vcvttph2udq 1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f]
+ vcvttph2udq 1016(%ecx), %xmm6
+
+// CHECK: vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80]
+ vcvttph2udq -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2udq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2udq (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x78,0x31]
+ vcvttph2udq (%ecx){1to8}, %ymm6
+
+// CHECK: vcvttph2udq 2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f]
+ vcvttph2udq 2032(%ecx), %ymm6
+
+// CHECK: vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80]
+ vcvttph2udq -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2uqq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0xf5]
+ vcvttph2uqq %xmm5, %xmm6
+
+// CHECK: vcvttph2uqq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0xf5]
+ vcvttph2uqq %xmm5, %ymm6
+
+// CHECK: vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uqq 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2uqq (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0x31]
+ vcvttph2uqq (%ecx){1to2}, %xmm6
+
+// CHECK: vcvttph2uqq 508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f]
+ vcvttph2uqq 508(%ecx), %xmm6
+
+// CHECK: vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80]
+ vcvttph2uqq -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uqq 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2uqq (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x78,0x31]
+ vcvttph2uqq (%ecx){1to4}, %ymm6
+
+// CHECK: vcvttph2uqq 1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f]
+ vcvttph2uqq 1016(%ecx), %ymm6
+
+// CHECK: vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80]
+ vcvttph2uqq -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2uw %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0xf5]
+ vcvttph2uw %xmm5, %xmm6
+
+// CHECK: vcvttph2uw %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0xf5]
+ vcvttph2uw %ymm5, %ymm6
+
+// CHECK: vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uw 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2uw (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0x31]
+ vcvttph2uw (%ecx){1to8}, %xmm6
+
+// CHECK: vcvttph2uw 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f]
+ vcvttph2uw 2032(%ecx), %xmm6
+
+// CHECK: vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80]
+ vcvttph2uw -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uw 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2uw (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7c,0x31]
+ vcvttph2uw (%ecx){1to16}, %ymm6
+
+// CHECK: vcvttph2uw 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f]
+ vcvttph2uw 4064(%ecx), %ymm6
+
+// CHECK: vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80]
+ vcvttph2uw -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2w %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0xf5]
+ vcvttph2w %xmm5, %xmm6
+
+// CHECK: vcvttph2w %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0xf5]
+ vcvttph2w %ymm5, %ymm6
+
+// CHECK: vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2w 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2w (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0x31]
+ vcvttph2w (%ecx){1to8}, %xmm6
+
+// CHECK: vcvttph2w 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f]
+ vcvttph2w 2032(%ecx), %xmm6
+
+// CHECK: vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80]
+ vcvttph2w -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2w 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2w (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7c,0x31]
+ vcvttph2w (%ecx){1to16}, %ymm6
+
+// CHECK: vcvttph2w 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f]
+ vcvttph2w 4064(%ecx), %ymm6
+
+// CHECK: vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80]
+ vcvttph2w -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtudq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0xf5]
+ vcvtudq2ph %xmm5, %xmm6
+
+// CHECK: vcvtudq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0xf5]
+ vcvtudq2ph %ymm5, %xmm6
+
+// CHECK: vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtudq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtudq2ph (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0x31]
+ vcvtudq2ph (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtudq2phx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f]
+ vcvtudq2phx 2032(%ecx), %xmm6
+
+// CHECK: vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80]
+ vcvtudq2ph -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtudq2ph (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7a,0x31]
+ vcvtudq2ph (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtudq2phy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f]
+ vcvtudq2phy 4064(%ecx), %xmm6
+
+// CHECK: vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80]
+ vcvtudq2ph -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuqq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0xf5]
+ vcvtuqq2ph %xmm5, %xmm6
+
+// CHECK: vcvtuqq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0xf5]
+ vcvtuqq2ph %ymm5, %xmm6
+
+// CHECK: vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtuqq2phx 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtuqq2ph (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0x31]
+ vcvtuqq2ph (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtuqq2phx 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f]
+ vcvtuqq2phx 2032(%ecx), %xmm6
+
+// CHECK: vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80]
+ vcvtuqq2ph -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuqq2ph (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x38,0x7a,0x31]
+ vcvtuqq2ph (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtuqq2phy 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f]
+ vcvtuqq2phy 4064(%ecx), %xmm6
+
+// CHECK: vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80]
+ vcvtuqq2ph -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuw2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0xf5]
+ vcvtuw2ph %xmm5, %xmm6
+
+// CHECK: vcvtuw2ph %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0xf5]
+ vcvtuw2ph %ymm5, %ymm6
+
+// CHECK: vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtuw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtuw2ph (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0x31]
+ vcvtuw2ph (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtuw2ph 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f]
+ vcvtuw2ph 2032(%ecx), %xmm6
+
+// CHECK: vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80]
+ vcvtuw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtuw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtuw2ph (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7d,0x31]
+ vcvtuw2ph (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtuw2ph 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f]
+ vcvtuw2ph 4064(%ecx), %ymm6
+
+// CHECK: vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80]
+ vcvtuw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtw2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0xf5]
+ vcvtw2ph %xmm5, %xmm6
+
+// CHECK: vcvtw2ph %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0xf5]
+ vcvtw2ph %ymm5, %ymm6
+
+// CHECK: vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtw2ph 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtw2ph (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0x31]
+ vcvtw2ph (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtw2ph 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f]
+ vcvtw2ph 2032(%ecx), %xmm6
+
+// CHECK: vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80]
+ vcvtw2ph -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtw2ph 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtw2ph (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x7d,0x31]
+ vcvtw2ph (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtw2ph 4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f]
+ vcvtw2ph 4064(%ecx), %ymm6
+
+// CHECK: vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80]
+ vcvtw2ph -256(%edx){1to16}, %ymm6 {%k7} {z}
diff --git a/llvm/test/MC/X86/intel-syntax-avx512fp16.s b/llvm/test/MC/X86/intel-syntax-avx512fp16.s
index 5d95bc82375a0..4b842f9bc622c 100644
--- a/llvm/test/MC/X86/intel-syntax-avx512fp16.s
+++ b/llvm/test/MC/X86/intel-syntax-avx512fp16.s
@@ -459,3 +459,771 @@
// CHECK: vucomish xmm6, word ptr [edx - 256]
// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x2e,0x72,0x80]
vucomish xmm6, word ptr [edx - 256]
+
+// CHECK: vcvtdq2ph ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0xf5]
+ vcvtdq2ph ymm6, zmm5
+
+// CHECK: vcvtdq2ph ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0xf5]
+ vcvtdq2ph ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5b,0x31]
+ vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0x71,0x7f]
+ vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5b,0x72,0x80]
+ vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtpd2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0xf5]
+ vcvtpd2ph xmm6, zmm5
+
+// CHECK: vcvtpd2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0xf5]
+ vcvtpd2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x5a,0x31]
+ vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0x71,0x7f]
+ vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x5a,0x72,0x80]
+ vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtph2dq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0xf5]
+ vcvtph2dq zmm6, ymm5
+
+// CHECK: vcvtph2dq zmm6, ymm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0xf5]
+ vcvtph2dq zmm6, ymm5, {rn-sae}
+
+// CHECK: vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2dq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x5b,0x31]
+ vcvtph2dq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0x71,0x7f]
+ vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x5b,0x72,0x80]
+ vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2pd zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0xf5]
+ vcvtph2pd zmm6, xmm5
+
+// CHECK: vcvtph2pd zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0xf5]
+ vcvtph2pd zmm6, xmm5, {sae}
+
+// CHECK: vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2pd zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5a,0x31]
+ vcvtph2pd zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0x71,0x7f]
+ vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5a,0x72,0x80]
+ vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2psx zmm6, ymm5
+// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0xf5]
+ vcvtph2psx zmm6, ymm5
+
+// CHECK: vcvtph2psx zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0xf5]
+ vcvtph2psx zmm6, ymm5, {sae}
+
+// CHECK: vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7d,0x4f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2psx zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x58,0x13,0x31]
+ vcvtph2psx zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0x71,0x7f]
+ vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7d,0xdf,0x13,0x72,0x80]
+ vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2qq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0xf5]
+ vcvtph2qq zmm6, xmm5
+
+// CHECK: vcvtph2qq zmm6, xmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0xf5]
+ vcvtph2qq zmm6, xmm5, {rn-sae}
+
+// CHECK: vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2qq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7b,0x31]
+ vcvtph2qq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0x71,0x7f]
+ vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7b,0x72,0x80]
+ vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2udq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0xf5]
+ vcvtph2udq zmm6, ymm5
+
+// CHECK: vcvtph2udq zmm6, ymm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0xf5]
+ vcvtph2udq zmm6, ymm5, {rn-sae}
+
+// CHECK: vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2udq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x79,0x31]
+ vcvtph2udq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0x71,0x7f]
+ vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x79,0x72,0x80]
+ vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2uqq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0xf5]
+ vcvtph2uqq zmm6, xmm5
+
+// CHECK: vcvtph2uqq zmm6, xmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0xf5]
+ vcvtph2uqq zmm6, xmm5, {rn-sae}
+
+// CHECK: vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2uqq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x79,0x31]
+ vcvtph2uqq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0x71,0x7f]
+ vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x79,0x72,0x80]
+ vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2uw zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0xf5]
+ vcvtph2uw zmm6, zmm5
+
+// CHECK: vcvtph2uw zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0xf5]
+ vcvtph2uw zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2uw zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7d,0x31]
+ vcvtph2uw zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0x71,0x7f]
+ vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7d,0x72,0x80]
+ vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtph2w zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0xf5]
+ vcvtph2w zmm6, zmm5
+
+// CHECK: vcvtph2w zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0xf5]
+ vcvtph2w zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2w zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7d,0x31]
+ vcvtph2w zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtph2w zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0x71,0x7f]
+ vcvtph2w zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7d,0x72,0x80]
+ vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtps2phx ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0xf5]
+ vcvtps2phx ymm6, zmm5
+
+// CHECK: vcvtps2phx ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0xf5]
+ vcvtps2phx ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtps2phx ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x1d,0x31]
+ vcvtps2phx ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0x71,0x7f]
+ vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x1d,0x72,0x80]
+ vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtqq2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0xf5]
+ vcvtqq2ph xmm6, zmm5
+
+// CHECK: vcvtqq2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0xf5]
+ vcvtqq2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x5b,0x31]
+ vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0x71,0x7f]
+ vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x5b,0x72,0x80]
+ vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtsd2sh xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0xf4]
+ vcvtsd2sh xmm6, xmm5, xmm4
+
+// CHECK: vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xd7,0x18,0x5a,0xf4]
+ vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
+
+// CHECK: vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x31]
+ vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
+
+// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x71,0x7f]
+ vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
+
+// CHECK: vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x8f,0x5a,0x72,0x80]
+ vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0xf4]
+ vcvtsh2sd xmm6, xmm5, xmm4
+
+// CHECK: vcvtsh2sd xmm6, xmm5, xmm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x5a,0xf4]
+ vcvtsh2sd xmm6, xmm5, xmm4, {sae}
+
+// CHECK: vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x31]
+ vcvtsh2sd xmm6, xmm5, word ptr [ecx]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x71,0x7f]
+ vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x56,0x8f,0x5a,0x72,0x80]
+ vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+
+// CHECK: vcvtsh2si edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0xd6]
+ vcvtsh2si edx, xmm6
+
+// CHECK: vcvtsh2si edx, xmm6, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2d,0xd6]
+ vcvtsh2si edx, xmm6, {rn-sae}
+
+// CHECK: vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2si edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x11]
+ vcvtsh2si edx, word ptr [ecx]
+
+// CHECK: vcvtsh2si edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
+ vcvtsh2si edx, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2si edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
+ vcvtsh2si edx, word ptr [edx - 256]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0xf4]
+ vcvtsh2ss xmm6, xmm5, xmm4
+
+// CHECK: vcvtsh2ss xmm6, xmm5, xmm4, {sae}
+// CHECK: encoding: [0x62,0xf6,0x54,0x18,0x13,0xf4]
+ vcvtsh2ss xmm6, xmm5, xmm4, {sae}
+
+// CHECK: vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x54,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x31]
+ vcvtsh2ss xmm6, xmm5, word ptr [ecx]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x71,0x7f]
+ vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf6,0x54,0x8f,0x13,0x72,0x80]
+ vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+
+// CHECK: vcvtsh2usi edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0xd6]
+ vcvtsh2usi edx, xmm6
+
+// CHECK: vcvtsh2usi edx, xmm6, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x79,0xd6]
+ vcvtsh2usi edx, xmm6, {rn-sae}
+
+// CHECK: vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2usi edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x11]
+ vcvtsh2usi edx, word ptr [ecx]
+
+// CHECK: vcvtsh2usi edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
+ vcvtsh2usi edx, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2usi edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
+ vcvtsh2usi edx, word ptr [edx - 256]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xf2]
+ vcvtsi2sh xmm6, xmm5, edx
+
+// CHECK: vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x2a,0xf2]
+ vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x31]
+ vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x71,0x7f]
+ vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x72,0x80]
+ vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvtss2sh xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0xf4]
+ vcvtss2sh xmm6, xmm5, xmm4
+
+// CHECK: vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x54,0x18,0x1d,0xf4]
+ vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
+
+// CHECK: vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x54,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x31]
+ vcvtss2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x71,0x7f]
+ vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x54,0x8f,0x1d,0x72,0x80]
+ vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvttph2dq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0xf5]
+ vcvttph2dq zmm6, ymm5
+
+// CHECK: vcvttph2dq zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0xf5]
+ vcvttph2dq zmm6, ymm5, {sae}
+
+// CHECK: vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2dq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x5b,0x31]
+ vcvttph2dq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0x71,0x7f]
+ vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x5b,0x72,0x80]
+ vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvttph2qq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0xf5]
+ vcvttph2qq zmm6, xmm5
+
+// CHECK: vcvttph2qq zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0xf5]
+ vcvttph2qq zmm6, xmm5, {sae}
+
+// CHECK: vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2qq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7a,0x31]
+ vcvttph2qq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0x71,0x7f]
+ vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7a,0x72,0x80]
+ vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvttph2udq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0xf5]
+ vcvttph2udq zmm6, ymm5
+
+// CHECK: vcvttph2udq zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0xf5]
+ vcvttph2udq zmm6, ymm5, {sae}
+
+// CHECK: vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2udq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x78,0x31]
+ vcvttph2udq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0x71,0x7f]
+ vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x78,0x72,0x80]
+ vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvttph2uqq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0xf5]
+ vcvttph2uqq zmm6, xmm5
+
+// CHECK: vcvttph2uqq zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0xf5]
+ vcvttph2uqq zmm6, xmm5, {sae}
+
+// CHECK: vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2uqq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x78,0x31]
+ vcvttph2uqq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0x71,0x7f]
+ vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x78,0x72,0x80]
+ vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvttph2uw zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0xf5]
+ vcvttph2uw zmm6, zmm5
+
+// CHECK: vcvttph2uw zmm6, zmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0xf5]
+ vcvttph2uw zmm6, zmm5, {sae}
+
+// CHECK: vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2uw zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7c,0x31]
+ vcvttph2uw zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0x71,0x7f]
+ vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7c,0x72,0x80]
+ vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvttph2w zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0xf5]
+ vcvttph2w zmm6, zmm5
+
+// CHECK: vcvttph2w zmm6, zmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0xf5]
+ vcvttph2w zmm6, zmm5, {sae}
+
+// CHECK: vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2w zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7c,0x31]
+ vcvttph2w zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvttph2w zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0x71,0x7f]
+ vcvttph2w zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7c,0x72,0x80]
+ vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvttsh2si edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0xd6]
+ vcvttsh2si edx, xmm6
+
+// CHECK: vcvttsh2si edx, xmm6, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2c,0xd6]
+ vcvttsh2si edx, xmm6, {sae}
+
+// CHECK: vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsh2si edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x11]
+ vcvttsh2si edx, word ptr [ecx]
+
+// CHECK: vcvttsh2si edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
+ vcvttsh2si edx, word ptr [ecx + 254]
+
+// CHECK: vcvttsh2si edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
+ vcvttsh2si edx, word ptr [edx - 256]
+
+// CHECK: vcvttsh2usi edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0xd6]
+ vcvttsh2usi edx, xmm6
+
+// CHECK: vcvttsh2usi edx, xmm6, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x78,0xd6]
+ vcvttsh2usi edx, xmm6, {sae}
+
+// CHECK: vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsh2usi edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x11]
+ vcvttsh2usi edx, word ptr [ecx]
+
+// CHECK: vcvttsh2usi edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
+ vcvttsh2usi edx, word ptr [ecx + 254]
+
+// CHECK: vcvttsh2usi edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
+ vcvttsh2usi edx, word ptr [edx - 256]
+
+// CHECK: vcvtudq2ph ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0xf5]
+ vcvtudq2ph ymm6, zmm5
+
+// CHECK: vcvtudq2ph ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0xf5]
+ vcvtudq2ph ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7a,0x31]
+ vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0x71,0x7f]
+ vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7a,0x72,0x80]
+ vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtuqq2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0xf5]
+ vcvtuqq2ph xmm6, zmm5
+
+// CHECK: vcvtuqq2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0xf5]
+ vcvtuqq2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xff,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xff,0x58,0x7a,0x31]
+ vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0x71,0x7f]
+ vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xff,0xdf,0x7a,0x72,0x80]
+ vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtusi2sh xmm6, xmm5, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xf2]
+ vcvtusi2sh xmm6, xmm5, edx
+
+// CHECK: vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x7b,0xf2]
+ vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x31]
+ vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x71,0x7f]
+ vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x72,0x80]
+ vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvtuw2ph zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0xf5]
+ vcvtuw2ph zmm6, zmm5
+
+// CHECK: vcvtuw2ph zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0xf5]
+ vcvtuw2ph zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtuw2ph zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7d,0x31]
+ vcvtuw2ph zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0x71,0x7f]
+ vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7d,0x72,0x80]
+ vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtw2ph zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0xf5]
+ vcvtw2ph zmm6, zmm5
+
+// CHECK: vcvtw2ph zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0xf5]
+ vcvtw2ph zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtw2ph zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x7d,0x31]
+ vcvtw2ph zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0x71,0x7f]
+ vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x7d,0x72,0x80]
+ vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
diff --git a/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s b/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
index d6ccd32bbfc16..5c53fc376e1cc 100644
--- a/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
+++ b/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
@@ -279,3 +279,859 @@
// CHECK: vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
// CHECK: encoding: [0x62,0x65,0x14,0x97,0x5c,0x72,0x80]
vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtdq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5b,0xf5]
+ vcvtdq2ph xmm30, xmm29
+
+// CHECK: vcvtdq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5b,0xf5]
+ vcvtdq2ph xmm30, ymm29
+
+// CHECK: vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5b,0x31]
+ vcvtdq2ph xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5b,0x71,0x7f]
+ vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5b,0x72,0x80]
+ vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5b,0x31]
+ vcvtdq2ph xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5b,0x71,0x7f]
+ vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5b,0x72,0x80]
+ vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtpd2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xfd,0x08,0x5a,0xf5]
+ vcvtpd2ph xmm30, xmm29
+
+// CHECK: vcvtpd2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xfd,0x28,0x5a,0xf5]
+ vcvtpd2ph xmm30, ymm29
+
+// CHECK: vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xfd,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xfd,0x18,0x5a,0x31]
+ vcvtpd2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xfd,0x08,0x5a,0x71,0x7f]
+ vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xfd,0x9f,0x5a,0x72,0x80]
+ vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xfd,0x38,0x5a,0x31]
+ vcvtpd2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xfd,0x28,0x5a,0x71,0x7f]
+ vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xfd,0xbf,0x5a,0x72,0x80]
+ vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvtph2dq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x5b,0xf5]
+ vcvtph2dq xmm30, xmm29
+
+// CHECK: vcvtph2dq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x5b,0xf5]
+ vcvtph2dq ymm30, xmm29
+
+// CHECK: vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2dq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x5b,0x31]
+ vcvtph2dq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2dq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x5b,0x71,0x7f]
+ vcvtph2dq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x5b,0x72,0x80]
+ vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2dq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x5b,0x31]
+ vcvtph2dq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x5b,0x71,0x7f]
+ vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x5b,0x72,0x80]
+ vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2pd xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5a,0xf5]
+ vcvtph2pd xmm30, xmm29
+
+// CHECK: vcvtph2pd ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5a,0xf5]
+ vcvtph2pd ymm30, xmm29
+
+// CHECK: vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2pd xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5a,0x31]
+ vcvtph2pd xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2pd xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5a,0x71,0x7f]
+ vcvtph2pd xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5a,0x72,0x80]
+ vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2pd ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5a,0x31]
+ vcvtph2pd ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2pd ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5a,0x71,0x7f]
+ vcvtph2pd ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5a,0x72,0x80]
+ vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2psx xmm30, xmm29
+// CHECK: encoding: [0x62,0x06,0x7d,0x08,0x13,0xf5]
+ vcvtph2psx xmm30, xmm29
+
+// CHECK: vcvtph2psx ymm30, xmm29
+// CHECK: encoding: [0x62,0x06,0x7d,0x28,0x13,0xf5]
+ vcvtph2psx ymm30, xmm29
+
+// CHECK: vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x26,0x7d,0x0f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2psx xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x46,0x7d,0x18,0x13,0x31]
+ vcvtph2psx xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2psx xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x66,0x7d,0x08,0x13,0x71,0x7f]
+ vcvtph2psx xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x66,0x7d,0x9f,0x13,0x72,0x80]
+ vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x26,0x7d,0x2f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2psx ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x46,0x7d,0x38,0x13,0x31]
+ vcvtph2psx ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x66,0x7d,0x28,0x13,0x71,0x7f]
+ vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x66,0x7d,0xbf,0x13,0x72,0x80]
+ vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2qq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7b,0xf5]
+ vcvtph2qq xmm30, xmm29
+
+// CHECK: vcvtph2qq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7b,0xf5]
+ vcvtph2qq ymm30, xmm29
+
+// CHECK: vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2qq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7b,0x31]
+ vcvtph2qq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2qq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7b,0x71,0x7f]
+ vcvtph2qq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7b,0x72,0x80]
+ vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2qq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7b,0x31]
+ vcvtph2qq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2qq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7b,0x71,0x7f]
+ vcvtph2qq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7b,0x72,0x80]
+ vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2udq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x79,0xf5]
+ vcvtph2udq xmm30, xmm29
+
+// CHECK: vcvtph2udq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x79,0xf5]
+ vcvtph2udq ymm30, xmm29
+
+// CHECK: vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2udq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x79,0x31]
+ vcvtph2udq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2udq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x79,0x71,0x7f]
+ vcvtph2udq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x79,0x72,0x80]
+ vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2udq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x79,0x31]
+ vcvtph2udq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x79,0x71,0x7f]
+ vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x79,0x72,0x80]
+ vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2uqq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x79,0xf5]
+ vcvtph2uqq xmm30, xmm29
+
+// CHECK: vcvtph2uqq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x79,0xf5]
+ vcvtph2uqq ymm30, xmm29
+
+// CHECK: vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uqq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x79,0x31]
+ vcvtph2uqq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2uqq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x79,0x71,0x7f]
+ vcvtph2uqq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x79,0x72,0x80]
+ vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uqq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x79,0x31]
+ vcvtph2uqq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2uqq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x79,0x71,0x7f]
+ vcvtph2uqq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x79,0x72,0x80]
+ vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2uw xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7d,0xf5]
+ vcvtph2uw xmm30, xmm29
+
+// CHECK: vcvtph2uw ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7d,0xf5]
+ vcvtph2uw ymm30, ymm29
+
+// CHECK: vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uw xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7d,0x31]
+ vcvtph2uw xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7d,0x71,0x7f]
+ vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7d,0x72,0x80]
+ vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uw ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7d,0x31]
+ vcvtph2uw ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7d,0x71,0x7f]
+ vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7d,0x72,0x80]
+ vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtph2w xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7d,0xf5]
+ vcvtph2w xmm30, xmm29
+
+// CHECK: vcvtph2w ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7d,0xf5]
+ vcvtph2w ymm30, ymm29
+
+// CHECK: vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2w xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7d,0x31]
+ vcvtph2w xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2w xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7d,0x71,0x7f]
+ vcvtph2w xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7d,0x72,0x80]
+ vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2w ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7d,0x31]
+ vcvtph2w ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtph2w ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7d,0x71,0x7f]
+ vcvtph2w ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7d,0x72,0x80]
+ vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtps2phx xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x1d,0xf5]
+ vcvtps2phx xmm30, xmm29
+
+// CHECK: vcvtps2phx xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x1d,0xf5]
+ vcvtps2phx xmm30, ymm29
+
+// CHECK: vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x1d,0x31]
+ vcvtps2phx xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x1d,0x71,0x7f]
+ vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x1d,0x72,0x80]
+ vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x1d,0x31]
+ vcvtps2phx xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x1d,0x71,0x7f]
+ vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x1d,0x72,0x80]
+ vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtqq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xfc,0x08,0x5b,0xf5]
+ vcvtqq2ph xmm30, xmm29
+
+// CHECK: vcvtqq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xfc,0x28,0x5b,0xf5]
+ vcvtqq2ph xmm30, ymm29
+
+// CHECK: vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xfc,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xfc,0x18,0x5b,0x31]
+ vcvtqq2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xfc,0x08,0x5b,0x71,0x7f]
+ vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xfc,0x9f,0x5b,0x72,0x80]
+ vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xfc,0x38,0x5b,0x31]
+ vcvtqq2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xfc,0x28,0x5b,0x71,0x7f]
+ vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xfc,0xbf,0x5b,0x72,0x80]
+ vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttph2dq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x5b,0xf5]
+ vcvttph2dq xmm30, xmm29
+
+// CHECK: vcvttph2dq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x5b,0xf5]
+ vcvttph2dq ymm30, xmm29
+
+// CHECK: vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2dq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x5b,0x31]
+ vcvttph2dq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2dq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x5b,0x71,0x7f]
+ vcvttph2dq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x5b,0x72,0x80]
+ vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2dq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x5b,0x31]
+ vcvttph2dq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x5b,0x71,0x7f]
+ vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x5b,0x72,0x80]
+ vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2qq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7a,0xf5]
+ vcvttph2qq xmm30, xmm29
+
+// CHECK: vcvttph2qq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7a,0xf5]
+ vcvttph2qq ymm30, xmm29
+
+// CHECK: vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2qq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7a,0x31]
+ vcvttph2qq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvttph2qq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7a,0x71,0x7f]
+ vcvttph2qq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7a,0x72,0x80]
+ vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2qq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7a,0x31]
+ vcvttph2qq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2qq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7a,0x71,0x7f]
+ vcvttph2qq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7a,0x72,0x80]
+ vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2udq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x78,0xf5]
+ vcvttph2udq xmm30, xmm29
+
+// CHECK: vcvttph2udq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x78,0xf5]
+ vcvttph2udq ymm30, xmm29
+
+// CHECK: vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2udq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x78,0x31]
+ vcvttph2udq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2udq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x78,0x71,0x7f]
+ vcvttph2udq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x78,0x72,0x80]
+ vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2udq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x78,0x31]
+ vcvttph2udq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x78,0x71,0x7f]
+ vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x78,0x72,0x80]
+ vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2uqq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x78,0xf5]
+ vcvttph2uqq xmm30, xmm29
+
+// CHECK: vcvttph2uqq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x78,0xf5]
+ vcvttph2uqq ymm30, xmm29
+
+// CHECK: vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uqq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x78,0x31]
+ vcvttph2uqq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvttph2uqq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x78,0x71,0x7f]
+ vcvttph2uqq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x78,0x72,0x80]
+ vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uqq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x78,0x31]
+ vcvttph2uqq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2uqq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x78,0x71,0x7f]
+ vcvttph2uqq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x78,0x72,0x80]
+ vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2uw xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7c,0xf5]
+ vcvttph2uw xmm30, xmm29
+
+// CHECK: vcvttph2uw ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7c,0xf5]
+ vcvttph2uw ymm30, ymm29
+
+// CHECK: vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uw xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7c,0x31]
+ vcvttph2uw xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7c,0x71,0x7f]
+ vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7c,0x72,0x80]
+ vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uw ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7c,0x31]
+ vcvttph2uw ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7c,0x71,0x7f]
+ vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7c,0x72,0x80]
+ vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvttph2w xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7c,0xf5]
+ vcvttph2w xmm30, xmm29
+
+// CHECK: vcvttph2w ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7c,0xf5]
+ vcvttph2w ymm30, ymm29
+
+// CHECK: vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2w xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7c,0x31]
+ vcvttph2w xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2w xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7c,0x71,0x7f]
+ vcvttph2w xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7c,0x72,0x80]
+ vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2w ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7c,0x31]
+ vcvttph2w ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvttph2w ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7c,0x71,0x7f]
+ vcvttph2w ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7c,0x72,0x80]
+ vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtudq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7a,0xf5]
+ vcvtudq2ph xmm30, xmm29
+
+// CHECK: vcvtudq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7a,0xf5]
+ vcvtudq2ph xmm30, ymm29
+
+// CHECK: vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7a,0x31]
+ vcvtudq2ph xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7a,0x71,0x7f]
+ vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7a,0x72,0x80]
+ vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7a,0x31]
+ vcvtudq2ph xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7a,0x71,0x7f]
+ vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7a,0x72,0x80]
+ vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtuqq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xff,0x08,0x7a,0xf5]
+ vcvtuqq2ph xmm30, xmm29
+
+// CHECK: vcvtuqq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xff,0x28,0x7a,0xf5]
+ vcvtuqq2ph xmm30, ymm29
+
+// CHECK: vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xff,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xff,0x18,0x7a,0x31]
+ vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xff,0x08,0x7a,0x71,0x7f]
+ vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xff,0x9f,0x7a,0x72,0x80]
+ vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xff,0x38,0x7a,0x31]
+ vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xff,0x28,0x7a,0x71,0x7f]
+ vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xff,0xbf,0x7a,0x72,0x80]
+ vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvtuw2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7d,0xf5]
+ vcvtuw2ph xmm30, xmm29
+
+// CHECK: vcvtuw2ph ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7d,0xf5]
+ vcvtuw2ph ymm30, ymm29
+
+// CHECK: vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuw2ph xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7d,0x31]
+ vcvtuw2ph xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7d,0x71,0x7f]
+ vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7d,0x72,0x80]
+ vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuw2ph ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7d,0x31]
+ vcvtuw2ph ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7d,0x71,0x7f]
+ vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7d,0x72,0x80]
+ vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtw2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x7d,0xf5]
+ vcvtw2ph xmm30, xmm29
+
+// CHECK: vcvtw2ph ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x7d,0xf5]
+ vcvtw2ph ymm30, ymm29
+
+// CHECK: vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtw2ph xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x7d,0x31]
+ vcvtw2ph xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x7d,0x71,0x7f]
+ vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x7d,0x72,0x80]
+ vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtw2ph ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x7d,0x31]
+ vcvtw2ph ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x7d,0x71,0x7f]
+ vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x7d,0x72,0x80]
+ vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
More information about the llvm-commits
mailing list