[clang] 2379949 - [X86] AVX512FP16 instructions enabling 3/6

via cfe-commits cfe-commits at lists.llvm.org
Tue Aug 17 18:35:15 PDT 2021


Author: Wang, Pengfei
Date: 2021-08-18T09:03:41+08:00
New Revision: 2379949aadcee8d4028dec0508f88bda290636bc

URL: https://github.com/llvm/llvm-project/commit/2379949aadcee8d4028dec0508f88bda290636bc
DIFF: https://github.com/llvm/llvm-project/commit/2379949aadcee8d4028dec0508f88bda290636bc.diff

LOG: [X86] AVX512FP16 instructions enabling 3/6

Enable FP16 conversion instructions.

Ref.: https://software.intel.com/content/www/us/en/develop/download/intel-avx512-fp16-architecture-specification.html

Reviewed By: LuoYuanke

Differential Revision: https://reviews.llvm.org/D105265

Added: 
    llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16-cvt.ll
    llvm/test/CodeGen/X86/cvt16-2.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll

Modified: 
    clang/include/clang/Basic/BuiltinsX86.def
    clang/include/clang/Basic/BuiltinsX86_64.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/avx512fp16intrin.h
    clang/lib/Headers/avx512vlfp16intrin.h
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/X86/avx512fp16-builtins.c
    clang/test/CodeGen/X86/avx512vlfp16-builtins.c
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/include/llvm/IR/RuntimeLibcalls.def
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/lib/Target/X86/X86InstrFoldTables.cpp
    llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86InstrSSE.td
    llvm/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16-arith.ll
    llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
    llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
    llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
    llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
    llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
    llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
    llvm/test/MC/Disassembler/X86/avx512fp16.txt
    llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
    llvm/test/MC/X86/avx512fp16.s
    llvm/test/MC/X86/avx512fp16vl.s
    llvm/test/MC/X86/intel-syntax-avx512fp16.s
    llvm/test/MC/X86/intel-syntax-avx512fp16vl.s

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 594415fe80692..a0926f230d46f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1876,6 +1876,84 @@ TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512f
 TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
 
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
+
 // generic select intrinsics
 TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")

diff  --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index ce2b1decdf6ca..e0c9bec9b4e00 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -92,6 +92,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi642sh, "V8xV8xUOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi642sh, "V8xV8xOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
 
 // UINTR

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 536a0bae13afe..08f4f644eb10d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -12723,10 +12723,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+  case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+  case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
     return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
 
   case X86::BI__builtin_ia32_vfmaddss3:

diff  --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 58d7349c4905a..6a4a9d4a6c7eb 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -947,6 +947,996 @@ static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
   return __b[0];
 }
 
+#define _mm512_cvt_roundpd_ph(A, R)                                            \
+  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask(                                  \
+      (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundpd_ph(W, U, A, R)                                 \
+  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W),         \
+                                             (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundpd_ph(U, A, R)                                   \
+  ((__m128h)__builtin_ia32_vcvtpd2ph512_mask(                                  \
+      (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+      (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+      (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+      (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_pd(A, R)                                            \
+  ((__m512d)__builtin_ia32_vcvtph2pd512_mask(                                  \
+      (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_pd(W, U, A, R)                                 \
+  ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W),         \
+                                             (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_pd(U, A, R)                                   \
+  ((__m512d)__builtin_ia32_vcvtph2pd512_mask(                                  \
+      (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
+  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+      (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
+  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+      (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+  return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+      (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_ss(A, B, R)                                            \
+  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B),       \
+                                               (__v4sf)_mm_undefined_ps(),     \
+                                               (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_ss(W, U, A, B, R)                                 \
+  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask(                                \
+      (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_ss(U, A, B, R)                                   \
+  ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B),       \
+                                               (__v4sf)_mm_setzero_ps(),       \
+                                               (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
+                                                            __m128h __B) {
+  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+      (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
+                                                                 __mmask8 __U,
+                                                                 __m128 __A,
+                                                                 __m128h __B) {
+  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
+                                                     (__v4sf)__W, (__mmask8)__U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
+                                                                  __m128 __A,
+                                                                  __m128h __B) {
+  return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+      (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundss_sh(A, B, R)                                            \
+  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B),      \
+                                                (__v8hf)_mm_undefined_ph(),    \
+                                                (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundss_sh(W, U, A, B, R)                                 \
+  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask(                               \
+      (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundss_sh(U, A, B, R)                                   \
+  ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B),      \
+                                                (__v8hf)_mm_setzero_ph(),      \
+                                                (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
+                                                             __m128 __B) {
+  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+      (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
+                                                                  __mmask8 __U,
+                                                                  __m128h __A,
+                                                                  __m128 __B) {
+  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+      (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
+                                                                   __m128h __A,
+                                                                   __m128 __B) {
+  return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+      (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsd_sh(A, B, R)                                            \
+  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B),      \
+                                                (__v8hf)_mm_undefined_ph(),    \
+                                                (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsd_sh(W, U, A, B, R)                                 \
+  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask(                               \
+      (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsd_sh(U, A, B, R)                                   \
+  ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B),      \
+                                                (__v8hf)_mm_setzero_ph(),      \
+                                                (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
+                                                             __m128d __B) {
+  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+      (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
+                                                                  __mmask8 __U,
+                                                                  __m128h __A,
+                                                                  __m128d __B) {
+  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+      (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
+  return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+      (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_sd(A, B, R)                                            \
+  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B),      \
+                                                (__v2df)_mm_undefined_pd(),    \
+                                                (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_sd(W, U, A, B, R)                                 \
+  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask(                               \
+      (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_sd(U, A, B, R)                                   \
+  ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B),      \
+                                                (__v2df)_mm_setzero_pd(),      \
+                                                (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
+                                                             __m128h __B) {
+  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+      (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
+                                                                  __mmask8 __U,
+                                                                  __m128d __A,
+                                                                  __m128h __B) {
+  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+      (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
+  return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+      (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi16(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A),                      \
+                                            (__v32hi)_mm512_undefined_epi32(), \
+                                            (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi16(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W),        \
+                                            (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi16(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A),                      \
+                                            (__v32hi)_mm512_setzero_epi32(),   \
+                                            (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi16(__m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+      (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+      (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+      (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi16(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2w512_mask(                                  \
+      (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi16(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W),       \
+                                             (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi16(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A),                     \
+                                             (__v32hi)_mm512_setzero_epi32(),  \
+                                             (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi16(__m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+      (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+      (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+      (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi16_ph(A, R)                                         \
+  ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A),                      \
+                                            (__v32hf)_mm512_undefined_ph(),    \
+                                            (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi16_ph(W, U, A, R)                              \
+  ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W),        \
+                                            (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi16_ph(U, A, R)                                \
+  ((__m512h)__builtin_ia32_vcvtw2ph512_mask(                                   \
+      (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi16_ph(__m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+      (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+      (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+      (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu16(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2uw512_mask(                                  \
+      (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu16(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W),       \
+                                             (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu16(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A),                     \
+                                             (__v32hu)_mm512_setzero_epi32(),  \
+                                             (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu16(__m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+      (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+      (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+      (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu16(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2uw512_mask(                                 \
+      (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu16(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W),      \
+                                              (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu16(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A),                    \
+                                              (__v32hu)_mm512_setzero_epi32(), \
+                                              (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu16(__m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+      (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+      (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+      (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu16_ph(A, R)                                         \
+  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A),                     \
+                                             (__v32hf)_mm512_undefined_ph(),   \
+                                             (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu16_ph(W, U, A, R)                              \
+  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W),       \
+                                             (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu16_ph(U, A, R)                                \
+  ((__m512h)__builtin_ia32_vcvtuw2ph512_mask(                                  \
+      (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu16_ph(__m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+      (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+      (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
+  return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+      (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi32(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2dq512_mask(                                  \
+      (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi32(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W),       \
+                                             (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi32(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A),                     \
+                                             (__v16si)_mm512_setzero_epi32(),  \
+                                             (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi32(__m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+      (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+      (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+      (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu32(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2udq512_mask(                                 \
+      (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu32(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W),      \
+                                              (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu32(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A),                    \
+                                              (__v16su)_mm512_setzero_epi32(), \
+                                              (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu32(__m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+      (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+      (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+      (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi32_ph(A, R)                                         \
+  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A),                     \
+                                             (__v16hf)_mm256_undefined_ph(),   \
+                                             (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi32_ph(W, U, A, R)                              \
+  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W),       \
+                                             (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi32_ph(U, A, R)                                \
+  ((__m256h)__builtin_ia32_vcvtdq2ph512_mask(                                  \
+      (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi32_ph(__m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+      (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+      (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+      (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu32_ph(A, R)                                         \
+  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A),                    \
+                                              (__v16hf)_mm256_undefined_ph(),  \
+                                              (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu32_ph(W, U, A, R)                              \
+  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W),      \
+                                              (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu32_ph(U, A, R)                                \
+  ((__m256h)__builtin_ia32_vcvtudq2ph512_mask(                                 \
+      (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu32_ph(__m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+      (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+      (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
+  return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+      (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi32(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2dq512_mask(                                 \
+      (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi32(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W),      \
+                                              (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi32(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A),                    \
+                                              (__v16si)_mm512_setzero_epi32(), \
+                                              (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi32(__m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+      (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+      (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+      (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu32(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2udq512_mask(                                \
+      (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1),        \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu32(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W),     \
+                                               (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu32(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2udq512_mask(                                \
+      (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U),           \
+      (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu32(__m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+      (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+      (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+      (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask(                                  \
+      (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi64_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W),         \
+                                             (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi64_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtqq2ph512_mask(                                  \
+      (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi64_ph(__m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+      (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+      (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+      (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi64(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A),                      \
+                                             (__v8di)_mm512_undefined_epi32(), \
+                                             (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi64(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W),         \
+                                             (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi64(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2qq512_mask(                                  \
+      (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi64(__m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+      (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+      (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+      (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_ph(A, R)                                         \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask(                                 \
+      (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu64_ph(W, U, A, R)                              \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W),        \
+                                              (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu64_ph(U, A, R)                                \
+  ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask(                                 \
+      (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu64_ph(__m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+      (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+      (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+      (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu64(A, R)                                         \
+  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask(                                 \
+      (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu64(W, U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W),        \
+                                              (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu64(U, A, R)                                \
+  ((__m512i)__builtin_ia32_vcvtph2uqq512_mask(                                 \
+      (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu64(__m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+      (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+      (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+      (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi64(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2qq512_mask(                                 \
+      (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi64(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W),        \
+                                              (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi64(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2qq512_mask(                                 \
+      (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi64(__m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+      (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+      (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+      (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu64(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask(                                \
+      (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1),           \
+      (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu64(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W),       \
+                                               (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu64(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvttph2uqq512_mask(                                \
+      (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu64(__m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+      (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+      (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+      (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_i32(A, R)                                              \
+  ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
+  return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u32(A, R)                                              \
+  ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u32(__m128h __A) {
+  return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundsh_i64(A, R)                                              \
+  ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
+  return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
+                                               _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u64(A, R)                                              \
+  ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u64(__m128h __A) {
+  return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
+      (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif // __x86_64__
+
+#define _mm_cvt_roundu32_sh(A, B, R)                                           \
+  ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu32_sh(__m128h __A, unsigned int __B) {
+  __A[0] = __B;
+  return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundu64_sh(A, B, R)                                           \
+  ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B),  \
+                                        (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
+  __A[0] = __B;
+  return __A;
+}
+#endif
+
+#define _mm_cvt_roundi32_sh(A, B, R)                                           \
+  ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
+                                                              int __B) {
+  __A[0] = __B;
+  return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundi64_sh(A, B, R)                                           \
+  ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
+                                                              long long __B) {
+  __A[0] = __B;
+  return __A;
+}
+#endif
+
+#define _mm_cvtt_roundsh_i32(A, R)                                             \
+  ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
+  return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
+                                          _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_i64(A, R)                                             \
+  ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
+  return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm_cvtt_roundsh_u32(A, R)                                             \
+  ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u32(__m128h __A) {
+  return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_u64(A, R)                                             \
+  ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u64(__m128h __A) {
+  return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
+      (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm512_cvtx_roundph_ps(A, R)                                           \
+  ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A),                     \
+                                             (__v16sf)_mm512_undefined_ps(),   \
+                                             (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundph_ps(W, U, A, R)                                \
+  ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W),       \
+                                             (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundph_ps(U, A, R)                                  \
+  ((__m512)__builtin_ia32_vcvtph2psx512_mask(                                  \
+      (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
+  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+      (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
+  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+      (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
+  return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+      (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtx_roundps_ph(A, R)                                           \
+  ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A),                    \
+                                              (__v16hf)_mm256_undefined_ph(),  \
+                                              (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundps_ph(W, U, A, R)                                \
+  ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W),      \
+                                              (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundps_ph(U, A, R)                                  \
+  ((__m256h)__builtin_ia32_vcvtps2phx512_mask(                                 \
+      (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
+  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+      (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
+  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+      (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
+  return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+      (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_ph(__m512h __W) {
   return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);

diff  --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 0f23054e6fa10..ab2cf436ee16d 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -327,6 +327,772 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
   ((__mmask8)__builtin_ia32_cmpph128_mask(                                     \
       (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
 
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+      (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
+                                                                  __mmask8 __U,
+                                                                  __m128d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+      (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+      (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
+  return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+      (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
+  return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+      (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
+                                                                  __mmask8 __U,
+                                                                  __m128h __A) {
+  return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+  return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+      (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
+  return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+      (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
+  return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+  return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+      (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+      (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
+                                                  (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+      (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi16(__m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+      (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
+                                                  (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+      (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+      (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+      (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi16(__m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+      (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
+                                                   (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+      (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
+  return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi16_ph(__m256i __A) {
+  return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+  return (__m256h)__builtin_ia32_selectph_256(
+      (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
+  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+                                              (__v16hf)_mm256_cvtepi16_ph(__A),
+                                              (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+      (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+      (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu16(__m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+      (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+                                                   (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+      (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+      (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+      (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu16(__m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+      (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+                                                    (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+      (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
+  return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu16_ph(__m256i __A) {
+  return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+  return (__m256h)__builtin_ia32_selectph_256(
+      (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
+  return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+                                              (__v16hf)_mm256_cvtepu16_ph(__A),
+                                              (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+      (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+      (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi32(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+      (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+      (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+      (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+      (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu32(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+      (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+      (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+      (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+      (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi32_ph(__m256i __A) {
+  return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+      (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+      (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu32_ph(__m256i __A) {
+  return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_selectph_128(
+      (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+      (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+      (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi32(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+      (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+      (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+      (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
+                                                     (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+      (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu32(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+      (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
+                                                     (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+      (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+      (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+      (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi64_ph(__m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+      (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+      (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+      (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+      (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi64(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+      (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+      (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+      (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+      (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu64_ph(__m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+      (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
+  return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+      (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+      (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+      (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu64(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+      (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+      (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+      (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+      (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi64(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+      (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+      (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+      (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+                                                     (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+      (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu64(__m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+      (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+                                                     (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+  return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+      (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
+  return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+      (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
+                                                                  __mmask8 __U,
+                                                                  __m128h __A) {
+  return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+  return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+      (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
+  return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+      (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
+  return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
+                                                   (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+  return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+      (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+      (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
+                                                                   __mmask8 __U,
+                                                                   __m128 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+      (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+      (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
+                                                    (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
+  return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+      (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
                                                                   __m128h __A,
                                                                   __m128h __W) {

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 063fd38f97c46..69560027f330a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3878,6 +3878,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcvttss2si64:
   case X86::BI__builtin_ia32_vcvttss2usi32:
   case X86::BI__builtin_ia32_vcvttss2usi64:
+  case X86::BI__builtin_ia32_vcvttsh2si32:
+  case X86::BI__builtin_ia32_vcvttsh2si64:
+  case X86::BI__builtin_ia32_vcvttsh2usi32:
+  case X86::BI__builtin_ia32_vcvttsh2usi64:
     ArgNum = 1;
     break;
   case X86::BI__builtin_ia32_maxpd512:
@@ -3888,6 +3892,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_minph512:
     ArgNum = 2;
     break;
+  case X86::BI__builtin_ia32_vcvtph2pd512_mask:
+  case X86::BI__builtin_ia32_vcvtph2psx512_mask:
   case X86::BI__builtin_ia32_cvtps2pd512_mask:
   case X86::BI__builtin_ia32_cvttpd2dq512_mask:
   case X86::BI__builtin_ia32_cvttpd2qq512_mask:
@@ -3897,6 +3903,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_cvttps2qq512_mask:
   case X86::BI__builtin_ia32_cvttps2udq512_mask:
   case X86::BI__builtin_ia32_cvttps2uqq512_mask:
+  case X86::BI__builtin_ia32_vcvttph2w512_mask:
+  case X86::BI__builtin_ia32_vcvttph2uw512_mask:
+  case X86::BI__builtin_ia32_vcvttph2dq512_mask:
+  case X86::BI__builtin_ia32_vcvttph2udq512_mask:
+  case X86::BI__builtin_ia32_vcvttph2qq512_mask:
+  case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
   case X86::BI__builtin_ia32_exp2pd_mask:
   case X86::BI__builtin_ia32_exp2ps_mask:
   case X86::BI__builtin_ia32_getexppd512_mask:
@@ -3916,6 +3928,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_cmpsd_mask:
   case X86::BI__builtin_ia32_cmpss_mask:
   case X86::BI__builtin_ia32_cmpsh_mask:
+  case X86::BI__builtin_ia32_vcvtsh2sd_round_mask:
+  case X86::BI__builtin_ia32_vcvtsh2ss_round_mask:
   case X86::BI__builtin_ia32_cvtss2sd_round_mask:
   case X86::BI__builtin_ia32_getexpsd128_round_mask:
   case X86::BI__builtin_ia32_getexpss128_round_mask:
@@ -3965,6 +3979,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcvtss2si64:
   case X86::BI__builtin_ia32_vcvtss2usi32:
   case X86::BI__builtin_ia32_vcvtss2usi64:
+  case X86::BI__builtin_ia32_vcvtsh2si32:
+  case X86::BI__builtin_ia32_vcvtsh2si64:
+  case X86::BI__builtin_ia32_vcvtsh2usi32:
+  case X86::BI__builtin_ia32_vcvtsh2usi64:
   case X86::BI__builtin_ia32_sqrtpd512:
   case X86::BI__builtin_ia32_sqrtps512:
     ArgNum = 1;
@@ -3988,11 +4006,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_cvtusi2sd64:
   case X86::BI__builtin_ia32_cvtusi2ss32:
   case X86::BI__builtin_ia32_cvtusi2ss64:
+  case X86::BI__builtin_ia32_vcvtusi2sh:
+  case X86::BI__builtin_ia32_vcvtusi642sh:
+  case X86::BI__builtin_ia32_vcvtsi2sh:
+  case X86::BI__builtin_ia32_vcvtsi642sh:
     ArgNum = 2;
     HasRC = true;
     break;
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtpd2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtps2phx512_mask:
   case X86::BI__builtin_ia32_cvtpd2ps512_mask:
   case X86::BI__builtin_ia32_cvtpd2dq512_mask:
   case X86::BI__builtin_ia32_cvtpd2qq512_mask:
@@ -4006,6 +4030,18 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtph2w512_mask:
+  case X86::BI__builtin_ia32_vcvtph2uw512_mask:
+  case X86::BI__builtin_ia32_vcvtph2dq512_mask:
+  case X86::BI__builtin_ia32_vcvtph2udq512_mask:
+  case X86::BI__builtin_ia32_vcvtph2qq512_mask:
+  case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
+  case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+  case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
     ArgNum = 3;
     HasRC = true;
     break;
@@ -4026,6 +4062,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_scalefsd_round_mask:
   case X86::BI__builtin_ia32_scalefss_round_mask:
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
+  case X86::BI__builtin_ia32_vcvtss2sh_round_mask:
+  case X86::BI__builtin_ia32_vcvtsd2sh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
   case X86::BI__builtin_ia32_vfmaddsd3_mask:

diff  --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index 4f627daff7e6c..d4fe44bc259ee 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -1542,6 +1542,1096 @@ __m128i test_mm_cvtsi16_si128(short A) {
   return _mm_cvtsi16_si128(A);
 }
 
+__m128h test_mm512_cvt_roundpd_ph(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvt_roundpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_cvt_roundpd_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundpd_ph(__m128h A, __mmask8 B, __m512d C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_mask_cvt_roundpd_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundpd_ph(__mmask8 A, __m512d B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_maskz_cvt_roundpd_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtpd_ph(__m512d A) {
+  // CHECK-LABEL: test_mm512_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_cvtpd_ph(A);
+}
+
+__m128h test_mm512_mask_cvtpd_ph(__m128h A, __mmask8 B, __m512d C) {
+  // CHECK-LABEL: test_mm512_mask_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtpd_ph(__mmask8 A, __m512d B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.512
+  return _mm512_maskz_cvtpd_ph(A, B);
+}
+
+__m512d test_mm512_cvt_roundph_pd(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_cvt_roundph_pd(A, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_mask_cvt_roundph_pd(__m512d A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_mask_cvt_roundph_pd(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_maskz_cvt_roundph_pd(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_maskz_cvt_roundph_pd(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_cvtph_pd(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_cvtph_pd(A);
+}
+
+__m512d test_mm512_mask_cvtph_pd(__m512d A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_mask_cvtph_pd(A, B, C);
+}
+
+__m512d test_mm512_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.512
+  return _mm512_maskz_cvtph_pd(A, B);
+}
+
+__m128 test_mm_cvt_roundsh_ss(__m128 A, __m128h B) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_cvt_roundsh_ss(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_mask_cvt_roundsh_ss(__m128 A, __mmask8 B, __m128 C, __m128h D) {
+  // CHECK-LABEL: test_mm_mask_cvt_roundsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_mask_cvt_roundsh_ss(A, B, C, D, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_maskz_cvt_roundsh_ss(__mmask8 A, __m128 B, __m128h C) {
+  // CHECK-LABEL: test_mm_maskz_cvt_roundsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_maskz_cvt_roundsh_ss(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m128 test_mm_cvtsh_ss(__m128 A, __m128h B) {
+  // CHECK-LABEL: test_mm_cvtsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_cvtsh_ss(A, B);
+}
+
+__m128 test_mm_mask_cvtsh_ss(__m128 A, __mmask8 B, __m128 C, __m128h D) {
+  // CHECK-LABEL: test_mm_mask_cvtsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_mask_cvtsh_ss(A, B, C, D);
+}
+
+__m128 test_mm_maskz_cvtsh_ss(__mmask8 A, __m128 B, __m128h C) {
+  // CHECK-LABEL: test_mm_maskz_cvtsh_ss
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2ss.round
+  return _mm_maskz_cvtsh_ss(A, B, C);
+}
+
+__m128h test_mm_cvt_roundss_sh(__m128h A, __m128 B) {
+  // CHECK-LABEL: test_mm_cvt_roundss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_cvt_roundss_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_mask_cvt_roundss_sh(__m128h A, __mmask8 B, __m128h C, __m128 D) {
+  // CHECK-LABEL: test_mm_mask_cvt_roundss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_mask_cvt_roundss_sh(A, B, C, D, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_maskz_cvt_roundss_sh(__mmask8 A, __m128h B, __m128 C) {
+  // CHECK-LABEL: test_mm_maskz_cvt_roundss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_maskz_cvt_roundss_sh(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtss_sh(__m128h A, __m128 B) {
+  // CHECK-LABEL: test_mm_cvtss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_cvtss_sh(A, B);
+}
+
+__m128h test_mm_mask_cvtss_sh(__m128h A, __mmask8 B, __m128h C, __m128 D) {
+  // CHECK-LABEL: test_mm_mask_cvtss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_mask_cvtss_sh(A, B, C, D);
+}
+
+__m128h test_mm_maskz_cvtss_sh(__mmask8 A, __m128h B, __m128 C) {
+  // CHECK-LABEL: test_mm_maskz_cvtss_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtss2sh.round
+  return _mm_maskz_cvtss_sh(A, B, C);
+}
+
+__m128h test_mm_cvt_roundsd_sh(__m128h A, __m128d B) {
+  // CHECK-LABEL: test_mm_cvt_roundsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_cvt_roundsd_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_mask_cvt_roundsd_sh(__m128h A, __mmask8 B, __m128h C, __m128d D) {
+  // CHECK-LABEL: test_mm_mask_cvt_roundsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_mask_cvt_roundsd_sh(A, B, C, D, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_maskz_cvt_roundsd_sh(__mmask8 A, __m128h B, __m128d C) {
+  // CHECK-LABEL: test_mm_maskz_cvt_roundsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_maskz_cvt_roundsd_sh(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtsd_sh(__m128h A, __m128d B) {
+  // CHECK-LABEL: test_mm_cvtsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_cvtsd_sh(A, B);
+}
+
+__m128h test_mm_mask_cvtsd_sh(__m128h A, __mmask8 B, __m128h C, __m128d D) {
+  // CHECK-LABEL: test_mm_mask_cvtsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_mask_cvtsd_sh(A, B, C, D);
+}
+
+__m128h test_mm_maskz_cvtsd_sh(__mmask8 A, __m128h B, __m128d C) {
+  // CHECK-LABEL: test_mm_maskz_cvtsd_sh
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsd2sh.round
+  return _mm_maskz_cvtsd_sh(A, B, C);
+}
+
+__m128d test_mm_cvt_roundsh_sd(__m128d A, __m128h B) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_cvt_roundsh_sd(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_mask_cvt_roundsh_sd(__m128d A, __mmask8 B, __m128d C, __m128h D) {
+  // CHECK-LABEL: test_mm_mask_cvt_roundsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_mask_cvt_roundsh_sd(A, B, C, D, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_maskz_cvt_roundsh_sd(__mmask8 A, __m128d B, __m128h C) {
+  // CHECK-LABEL: test_mm_maskz_cvt_roundsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_maskz_cvt_roundsh_sd(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m128d test_mm_cvtsh_sd(__m128d A, __m128h B) {
+  // CHECK-LABEL: test_mm_cvtsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_cvtsh_sd(A, B);
+}
+
+__m128d test_mm_mask_cvtsh_sd(__m128d A, __mmask8 B, __m128d C, __m128h D) {
+  // CHECK-LABEL: test_mm_mask_cvtsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_mask_cvtsh_sd(A, B, C, D);
+}
+
+__m128d test_mm_maskz_cvtsh_sd(__mmask8 A, __m128d B, __m128h C) {
+  // CHECK-LABEL: test_mm_maskz_cvtsh_sd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtsh2sd.round
+  return _mm_maskz_cvtsh_sd(A, B, C);
+}
+
+__m512i test_mm512_cvt_roundph_epi16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_cvt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_mask_cvt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_maskz_cvt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_cvtph_epi16(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_mask_cvtph_epi16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.512
+  return _mm512_maskz_cvtph_epi16(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_cvtt_roundph_epi16(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_mask_cvtt_roundph_epi16(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_maskz_cvtt_roundph_epi16(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_cvttph_epi16(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_mask_cvttph_epi16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.512
+  return _mm512_maskz_cvttph_epi16(A, B);
+}
+
+__m512h test_mm512_cvt_roundepi16_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepi16_ph
+  // CHECK:   @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+  return _mm512_cvt_roundepi16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_mask_cvt_roundepi16_ph(__m512h A, __mmask32 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepi16_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+  return _mm512_mask_cvt_roundepi16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_maskz_cvt_roundepi16_ph(__mmask32 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepi16_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v32f16.v32i16
+  return _mm512_maskz_cvt_roundepi16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_cvtepi16_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_cvtepi16_ph(A);
+}
+
+__m512h test_mm512_mask_cvtepi16_ph(__m512h A, __mmask32 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_mask_cvtepi16_ph(A, B, C);
+}
+
+__m512h test_mm512_maskz_cvtepi16_ph(__mmask32 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_maskz_cvtepi16_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_cvt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_mask_cvt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_maskz_cvt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_cvtph_epu16(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_mask_cvtph_epu16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.512
+  return _mm512_maskz_cvtph_epu16(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_cvtt_roundph_epu16(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_mask_cvtt_roundph_epu16(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_maskz_cvtt_roundph_epu16(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu16(__m512h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_cvttph_epu16(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu16(__m512i A, __mmask32 B, __m512h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_mask_cvttph_epu16(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu16(__mmask32 A, __m512h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.512
+  return _mm512_maskz_cvttph_epu16(A, B);
+}
+
+__m512h test_mm512_cvt_roundepu16_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepu16_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+  return _mm512_cvt_roundepu16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_mask_cvt_roundepu16_ph(__m512h A, __mmask32 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepu16_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+  return _mm512_mask_cvt_roundepu16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_maskz_cvt_roundepu16_ph(__mmask32 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepu16_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v32f16.v32i16
+  return _mm512_maskz_cvt_roundepu16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512h test_mm512_cvtepu16_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_cvtepu16_ph(A);
+}
+
+__m512h test_mm512_mask_cvtepu16_ph(__m512h A, __mmask32 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_mask_cvtepu16_ph(A, B, C);
+}
+
+__m512h test_mm512_maskz_cvtepu16_ph(__mmask32 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <32 x i16> %{{.*}} to <32 x half>
+  return _mm512_maskz_cvtepu16_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epi32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_cvt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_mask_cvt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_maskz_cvt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_cvtph_epi32(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_mask_cvtph_epi32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.512
+  return _mm512_maskz_cvtph_epi32(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_cvt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_mask_cvt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_maskz_cvt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_cvtph_epu32(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_mask_cvtph_epu32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.512
+  return _mm512_maskz_cvtph_epu32(A, B);
+}
+
+__m256h test_mm512_cvt_roundepi32_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepi32_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+  return _mm512_cvt_roundepi32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvt_roundepi32_ph(__m256h A, __mmask16 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepi32_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+  return _mm512_mask_cvt_roundepi32_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvt_roundepi32_ph(__mmask16 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepi32_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i32
+  return _mm512_maskz_cvt_roundepi32_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtepi32_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_cvtepi32_ph(A);
+}
+
+__m256h test_mm512_mask_cvtepi32_ph(__m256h A, __mmask16 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_mask_cvtepi32_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtepi32_ph(__mmask16 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_maskz_cvtepi32_ph(A, B);
+}
+
+__m256h test_mm512_cvt_roundepu32_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepu32_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+  return _mm512_cvt_roundepu32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvt_roundepu32_ph(__m256h A, __mmask16 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepu32_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+  return _mm512_mask_cvt_roundepu32_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvt_roundepu32_ph(__mmask16 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepu32_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i32
+  return _mm512_maskz_cvt_roundepu32_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtepu32_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_cvtepu32_ph(A);
+}
+
+__m256h test_mm512_mask_cvtepu32_ph(__m256h A, __mmask16 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_mask_cvtepu32_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtepu32_ph(__mmask16 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <16 x i32> %{{.*}} to <16 x half>
+  return _mm512_maskz_cvtepu32_ph(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_cvtt_roundph_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_mask_cvtt_roundph_epi32(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_maskz_cvtt_roundph_epi32(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_cvttph_epi32(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_mask_cvttph_epi32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.512
+  return _mm512_maskz_cvttph_epi32(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_cvtt_roundph_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_mask_cvtt_roundph_epu32(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_maskz_cvtt_roundph_epu32(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu32(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_cvttph_epu32(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu32(__m512i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_mask_cvttph_epu32(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu32(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.512
+  return _mm512_maskz_cvttph_epu32(A, B);
+}
+
+__m128h test_mm512_cvt_roundepi64_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepi64_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+  return _mm512_cvt_roundepi64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundepi64_ph(__m128h A, __mmask8 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepi64_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+  return _mm512_mask_cvt_roundepi64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundepi64_ph(__mmask8 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepi64_ph
+  // CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i64
+  return _mm512_maskz_cvt_roundepi64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtepi64_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepi64_ph
+  // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_cvtepi64_ph(A);
+}
+
+__m128h test_mm512_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepi64_ph
+  // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtepi64_ph(__mmask8 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepi64_ph
+  // CHECK: %{{.*}} = sitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_maskz_cvtepi64_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_cvt_roundph_epi64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epi64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_mask_cvt_roundph_epi64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_maskz_cvt_roundph_epi64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_cvtph_epi64(A);
+}
+
+__m512i test_mm512_mask_cvtph_epi64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_mask_cvtph_epi64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.512
+  return _mm512_maskz_cvtph_epi64(A, B);
+}
+
+__m128h test_mm512_cvt_roundepu64_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvt_roundepu64_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+  return _mm512_cvt_roundepu64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_mask_cvt_roundepu64_ph(__m128h A, __mmask8 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepu64_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+  return _mm512_mask_cvt_roundepu64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_maskz_cvt_roundepu64_ph(__mmask8 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepu64_ph
+  // CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i64
+  return _mm512_maskz_cvt_roundepu64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm512_cvtepu64_ph(__m512i A) {
+  // CHECK-LABEL: test_mm512_cvtepu64_ph
+  // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_cvtepu64_ph(A);
+}
+
+__m128h test_mm512_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m512i C) {
+  // CHECK-LABEL: test_mm512_mask_cvtepu64_ph
+  // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm512_maskz_cvtepu64_ph(__mmask8 A, __m512i B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtepu64_ph
+  // CHECK: %{{.*}} = uitofp <8 x i64> %{{.*}} to <8 x half>
+  return _mm512_maskz_cvtepu64_ph(A, B);
+}
+
+__m512i test_mm512_cvt_roundph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_cvt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvt_roundph_epu64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_mask_cvt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvt_roundph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_maskz_cvt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvtph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_cvtph_epu64(A);
+}
+
+__m512i test_mm512_mask_cvtph_epu64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_mask_cvtph_epu64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.512
+  return _mm512_maskz_cvtph_epu64(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_cvtt_roundph_epi64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epi64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_mask_cvtt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_maskz_cvtt_roundph_epi64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_cvttph_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttph_epi64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_mask_cvttph_epi64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.512
+  return _mm512_maskz_cvttph_epi64(A, B);
+}
+
+__m512i test_mm512_cvtt_roundph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvtt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_cvtt_roundph_epu64(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtt_roundph_epu64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_mask_cvtt_roundph_epu64(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtt_roundph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_maskz_cvtt_roundph_epu64(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm512_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_cvttph_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttph_epu64(__m512i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm512_mask_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_mask_cvttph_epu64(A, B, C);
+}
+
+__m512i test_mm512_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.512
+  return _mm512_maskz_cvttph_epu64(A, B);
+}
+
+int test_mm_cvt_roundsh_i32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_i32
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2si32
+  return _mm_cvt_roundsh_i32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvtsh_i32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtsh_i32
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2si32
+  return _mm_cvtsh_i32(A);
+}
+
+unsigned int test_mm_cvt_roundsh_u32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_u32
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi32
+  return _mm_cvt_roundsh_u32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+unsigned int test_mm_cvtsh_u32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtsh_u32
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi32
+  return _mm_cvtsh_u32(A);
+}
+
+#ifdef __x86_64__
+long long test_mm_cvt_roundsh_i64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_i64
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2si64
+  return _mm_cvt_roundsh_i64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvtsh_i64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtsh_i64
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2si64
+  return _mm_cvtsh_i64(A);
+}
+
+unsigned long long test_mm_cvt_roundsh_u64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvt_roundsh_u64
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi64
+  return _mm_cvt_roundsh_u64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvtsh_u64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtsh_u64
+  // CHECK: @llvm.x86.avx512fp16.vcvtsh2usi64
+  return _mm_cvtsh_u64(A);
+}
+#endif
+
+__m128h test_mm_cvt_roundu32_sh(__m128h A, unsigned int B) {
+  // CHECK-LABEL: test_mm_cvt_roundu32_sh
+  // CHECK: @llvm.x86.avx512fp16.vcvtusi2sh
+  return _mm_cvt_roundu32_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtu32_sh(__m128h A, unsigned int B) {
+  // CHECK-LABEL: test_mm_cvtu32_sh
+  // CHECK: %{{.*}} = uitofp i32 %{{.*}} to half
+  return _mm_cvtu32_sh(A, B);
+}
+
+#ifdef __x86_64__
+__m128h test_mm_cvt_roundu64_sh(__m128h A, unsigned long long B) {
+  // CHECK-LABEL: test_mm_cvt_roundu64_sh
+  // CHECK: @llvm.x86.avx512fp16.vcvtusi642sh
+  return _mm_cvt_roundu64_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvtu64_sh(__m128h A, unsigned long long B) {
+  // CHECK-LABEL: test_mm_cvtu64_sh
+  // CHECK: %{{.*}} = uitofp i64 %{{.*}} to half
+  return _mm_cvtu64_sh(A, B);
+}
+#endif
+
+__m128h test_mm_cvt_roundi32_sh(__m128h A, int B) {
+  // CHECK-LABEL: test_mm_cvt_roundi32_sh
+  // CHECK: @llvm.x86.avx512fp16.vcvtsi2sh
+  return _mm_cvt_roundi32_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvti32_sh(__m128h A, int B) {
+  // CHECK-LABEL: test_mm_cvti32_sh
+  // CHECK: %{{.*}} = sitofp i32 %{{.*}} to half
+  return _mm_cvti32_sh(A, B);
+}
+
+#ifdef __x86_64__
+__m128h test_mm_cvt_roundi64_sh(__m128h A, long long B) {
+  // CHECK-LABEL: test_mm_cvt_roundi64_sh
+  // CHECK: @llvm.x86.avx512fp16.vcvtsi642sh
+  return _mm_cvt_roundi64_sh(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m128h test_mm_cvti64_sh(__m128h A, long long B) {
+  // CHECK-LABEL: test_mm_cvti64_sh
+  // CHECK: %{{.*}} = sitofp i64 %{{.*}} to half
+  return _mm_cvti64_sh(A, B);
+}
+#endif
+
+int test_mm_cvtt_roundsh_i32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtt_roundsh_i32
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2si32
+  return _mm_cvtt_roundsh_i32(A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsh_i32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttsh_i32
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2si32
+  return _mm_cvttsh_i32(A);
+}
+
+#ifdef __x86_64__
+long long test_mm_cvtt_roundsh_i64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtt_roundsh_i64
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2si64
+  return _mm_cvtt_roundsh_i64(A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttsh_i64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttsh_i64
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2si64
+  return _mm_cvttsh_i64(A);
+}
+#endif
+
+unsigned int test_mm_cvtt_roundsh_u32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtt_roundsh_u32
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi32
+  return _mm_cvtt_roundsh_u32(A, _MM_FROUND_NO_EXC);
+}
+
+unsigned int test_mm_cvttsh_u32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttsh_u32
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi32
+  return _mm_cvttsh_u32(A);
+}
+
+#ifdef __x86_64__
+unsigned long long test_mm_cvtt_roundsh_u64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtt_roundsh_u64
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi64
+  return _mm_cvtt_roundsh_u64(A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttsh_u64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttsh_u64
+  // CHECK: @llvm.x86.avx512fp16.vcvttsh2usi64
+  return _mm_cvttsh_u64(A);
+}
+#endif
+
+__m512 test_mm512_cvtx_roundph_ps(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtx_roundph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_cvtx_roundph_ps(A, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_mask_cvtx_roundph_ps(__m512 A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtx_roundph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_mask_cvtx_roundph_ps(A, B, C, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_maskz_cvtx_roundph_ps(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtx_roundph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_maskz_cvtx_roundph_ps(A, B, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_cvtxph_ps(__m256h A) {
+  // CHECK-LABEL: test_mm512_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_cvtxph_ps(A);
+}
+
+__m512 test_mm512_mask_cvtxph_ps(__m512 A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm512_mask_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_mask_cvtxph_ps(A, B, C);
+}
+
+__m512 test_mm512_maskz_cvtxph_ps(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.512
+  return _mm512_maskz_cvtxph_ps(A, B);
+}
+
+__m256h test_mm512_cvtx_roundps_ph(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvtx_roundps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_cvtx_roundps_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_mask_cvtx_roundps_ph(__m256h A, __mmask16 B, __m512 C) {
+  // CHECK-LABEL: test_mm512_mask_cvtx_roundps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_mask_cvtx_roundps_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_maskz_cvtx_roundps_ph(__mmask16 A, __m512 B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtx_roundps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_maskz_cvtx_roundps_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
+__m256h test_mm512_cvtxps_ph(__m512 A) {
+  // CHECK-LABEL: test_mm512_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_cvtxps_ph(A);
+}
+
+__m256h test_mm512_mask_cvtxps_ph(__m256h A, __mmask16 B, __m512 C) {
+  // CHECK-LABEL: test_mm512_mask_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_mask_cvtxps_ph(A, B, C);
+}
+
+__m256h test_mm512_maskz_cvtxps_ph(__mmask16 A, __m512 B) {
+  // CHECK-LABEL: test_mm512_maskz_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.512
+  return _mm512_maskz_cvtxps_ph(A, B);
+}
+
 _Float16 test_mm512_reduce_add_ph(__m512h __W) {
   // CHECK-LABEL: @test_mm512_reduce_add_ph
   // CHECK: call reassoc half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})

diff  --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
index a4e3b1e2be941..0d020ccd1452f 100644
--- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -1215,6 +1215,798 @@ __mmask8 test_mm_mask_cmp_ph_mask_true_us(__mmask8 m, __m128h a, __m128h b) {
   return _mm_mask_cmp_ph_mask(m, a, b, _CMP_TRUE_US);
 }
 
+__m128h test_mm_cvtpd_ph(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+  return _mm_cvtpd_ph(A);
+}
+
+__m128h test_mm_mask_cvtpd_ph(__m128h A, __mmask8 B, __m128d C) {
+  // CHECK-LABEL: test_mm_mask_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+  return _mm_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtpd_ph(__mmask8 A, __m128d B) {
+  // CHECK-LABEL: test_mm_maskz_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.128
+  return _mm_maskz_cvtpd_ph(A, B);
+}
+
+__m128h test_mm256_cvtpd_ph(__m256d A) {
+  // CHECK-LABEL: test_mm256_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+  return _mm256_cvtpd_ph(A);
+}
+
+__m128h test_mm256_mask_cvtpd_ph(__m128h A, __mmask8 B, __m256d C) {
+  // CHECK-LABEL: test_mm256_mask_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+  return _mm256_mask_cvtpd_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtpd_ph(__mmask8 A, __m256d B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtpd_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtpd2ph.256
+  return _mm256_maskz_cvtpd_ph(A, B);
+}
+
+__m128d test_mm_cvtph_pd(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+  return _mm_cvtph_pd(A);
+}
+
+__m128d test_mm_mask_cvtph_pd(__m128d A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+  return _mm_mask_cvtph_pd(A, B, C);
+}
+
+__m128d test_mm_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.128
+  return _mm_maskz_cvtph_pd(A, B);
+}
+
+__m256d test_mm256_cvtph_pd(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+  return _mm256_cvtph_pd(A);
+}
+
+__m256d test_mm256_mask_cvtph_pd(__m256d A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+  return _mm256_mask_cvtph_pd(A, B, C);
+}
+
+__m256d test_mm256_maskz_cvtph_pd(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_pd
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2pd.256
+  return _mm256_maskz_cvtph_pd(A, B);
+}
+
+__m128i test_mm_cvtph_epi16(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+  return _mm_cvtph_epi16(A);
+}
+
+__m128i test_mm_mask_cvtph_epi16(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+  return _mm_mask_cvtph_epi16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi16(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.128
+  return _mm_maskz_cvtph_epi16(A, B);
+}
+
+__m256i test_mm256_cvtph_epi16(__m256h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+  return _mm256_cvtph_epi16(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi16(__m256i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+  return _mm256_mask_cvtph_epi16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi16(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2w.256
+  return _mm256_maskz_cvtph_epi16(A, B);
+}
+
+__m128i test_mm_cvttph_epi16(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+  return _mm_cvttph_epi16(A);
+}
+
+__m128i test_mm_mask_cvttph_epi16(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+  return _mm_mask_cvttph_epi16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi16(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.128
+  return _mm_maskz_cvttph_epi16(A, B);
+}
+
+__m256i test_mm256_cvttph_epi16(__m256h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+  return _mm256_cvttph_epi16(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi16(__m256i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+  return _mm256_mask_cvttph_epi16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi16(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epi16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2w.256
+  return _mm256_maskz_cvttph_epi16(A, B);
+}
+
+__m128h test_mm_cvtepi16_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_cvtepi16_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi16_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_mask_cvtepi16_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi16_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_maskz_cvtepi16_ph(A, B);
+}
+
+__m256h test_mm256_cvtepi16_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_cvtepi16_ph(A);
+}
+
+__m256h test_mm256_mask_cvtepi16_ph(__m256h A, __mmask16 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_mask_cvtepi16_ph(A, B, C);
+}
+
+__m256h test_mm256_maskz_cvtepi16_ph(__mmask16 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepi16_ph
+  // CHECK: %{{.*}} = sitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_maskz_cvtepi16_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epu16(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+  return _mm_cvtph_epu16(A);
+}
+
+__m128i test_mm_mask_cvtph_epu16(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+  return _mm_mask_cvtph_epu16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu16(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.128
+  return _mm_maskz_cvtph_epu16(A, B);
+}
+
+__m256i test_mm256_cvtph_epu16(__m256h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+  return _mm256_cvtph_epu16(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu16(__m256i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+  return _mm256_mask_cvtph_epu16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu16(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uw.256
+  return _mm256_maskz_cvtph_epu16(A, B);
+}
+
+__m128i test_mm_cvttph_epu16(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+  return _mm_cvttph_epu16(A);
+}
+
+__m128i test_mm_mask_cvttph_epu16(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+  return _mm_mask_cvttph_epu16(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu16(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.128
+  return _mm_maskz_cvttph_epu16(A, B);
+}
+
+__m256i test_mm256_cvttph_epu16(__m256h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+  return _mm256_cvttph_epu16(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu16(__m256i A, __mmask16 B, __m256h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+  return _mm256_mask_cvttph_epu16(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu16(__mmask16 A, __m256h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epu16
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uw.256
+  return _mm256_maskz_cvttph_epu16(A, B);
+}
+
+__m128h test_mm_cvtepu16_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_cvtepu16_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu16_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_mask_cvtepu16_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu16_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <8 x i16> %{{.*}} to <8 x half>
+  return _mm_maskz_cvtepu16_ph(A, B);
+}
+
+__m256h test_mm256_cvtepu16_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_cvtepu16_ph(A);
+}
+
+__m256h test_mm256_mask_cvtepu16_ph(__m256h A, __mmask16 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_mask_cvtepu16_ph(A, B, C);
+}
+
+__m256h test_mm256_maskz_cvtepu16_ph(__mmask16 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepu16_ph
+  // CHECK: %{{.*}} = uitofp <16 x i16> %{{.*}} to <16 x half>
+  return _mm256_maskz_cvtepu16_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epi32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+  return _mm_cvtph_epi32(A);
+}
+
+__m128i test_mm_mask_cvtph_epi32(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+  return _mm_mask_cvtph_epi32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.128
+  return _mm_maskz_cvtph_epi32(A, B);
+}
+
+__m256i test_mm256_cvtph_epi32(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+  return _mm256_cvtph_epi32(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi32(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+  return _mm256_mask_cvtph_epi32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2dq.256
+  return _mm256_maskz_cvtph_epi32(A, B);
+}
+
+__m128i test_mm_cvtph_epu32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+  return _mm_cvtph_epu32(A);
+}
+
+__m128i test_mm_mask_cvtph_epu32(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+  return _mm_mask_cvtph_epu32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.128
+  return _mm_maskz_cvtph_epu32(A, B);
+}
+
+__m256i test_mm256_cvtph_epu32(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+  return _mm256_cvtph_epu32(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu32(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+  return _mm256_mask_cvtph_epu32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2udq.256
+  return _mm256_maskz_cvtph_epu32(A, B);
+}
+
+__m128h test_mm_cvtepi32_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+  return _mm_cvtepi32_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepi32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+  return _mm_mask_cvtepi32_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi32_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepi32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtdq2ph.128
+  return _mm_maskz_cvtepi32_ph(A, B);
+}
+
+__m128h test_mm256_cvtepi32_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_cvtepi32_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepi32_ph(__m128h A, __mmask8 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_mask_cvtepi32_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepi32_ph(__mmask8 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepi32_ph
+  // CHECK: %{{.*}} = sitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_maskz_cvtepi32_ph(A, B);
+}
+
+__m128h test_mm_cvtepu32_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepu32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+  return _mm_cvtepu32_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepu32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+  return _mm_mask_cvtepu32_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu32_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepu32_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtudq2ph.128
+  return _mm_maskz_cvtepu32_ph(A, B);
+}
+
+__m128h test_mm256_cvtepu32_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_cvtepu32_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepu32_ph(__m128h A, __mmask8 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_mask_cvtepu32_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepu32_ph(__mmask8 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepu32_ph
+  // CHECK: %{{.*}} = uitofp <8 x i32> %{{.*}} to <8 x half>
+  return _mm256_maskz_cvtepu32_ph(A, B);
+}
+
+__m128i test_mm_cvttph_epi32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+  return _mm_cvttph_epi32(A);
+}
+
+__m128i test_mm_mask_cvttph_epi32(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+  return _mm_mask_cvttph_epi32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.128
+  return _mm_maskz_cvttph_epi32(A, B);
+}
+
+__m256i test_mm256_cvttph_epi32(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+  return _mm256_cvttph_epi32(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi32(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+  return _mm256_mask_cvttph_epi32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epi32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2dq.256
+  return _mm256_maskz_cvttph_epi32(A, B);
+}
+
+__m128i test_mm_cvttph_epu32(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+  return _mm_cvttph_epu32(A);
+}
+
+__m128i test_mm_mask_cvttph_epu32(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+  return _mm_mask_cvttph_epu32(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.128
+  return _mm_maskz_cvttph_epu32(A, B);
+}
+
+__m256i test_mm256_cvttph_epu32(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+  return _mm256_cvttph_epu32(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu32(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+  return _mm256_mask_cvttph_epu32(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu32(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epu32
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2udq.256
+  return _mm256_maskz_cvttph_epu32(A, B);
+}
+
+__m128h test_mm_cvtepi64_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+  return _mm_cvtepi64_ph(A);
+}
+
+__m128h test_mm_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+  return _mm_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepi64_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.128
+  return _mm_maskz_cvtepi64_ph(A, B);
+}
+
+__m128h test_mm256_cvtepi64_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+  return _mm256_cvtepi64_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepi64_ph(__m128h A, __mmask8 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+  return _mm256_mask_cvtepi64_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepi64_ph(__mmask8 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepi64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtqq2ph.256
+  return _mm256_maskz_cvtepi64_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+  return _mm_cvtph_epi64(A);
+}
+
+__m128i test_mm_mask_cvtph_epi64(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+  return _mm_mask_cvtph_epi64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.128
+  return _mm_maskz_cvtph_epi64(A, B);
+}
+
+__m256i test_mm256_cvtph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+  return _mm256_cvtph_epi64(A);
+}
+
+__m256i test_mm256_mask_cvtph_epi64(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+  return _mm256_mask_cvtph_epi64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2qq.256
+  return _mm256_maskz_cvtph_epi64(A, B);
+}
+
+__m128h test_mm_cvtepu64_ph(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+  return _mm_cvtepu64_ph(A);
+}
+
+__m128h test_mm_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m128i C) {
+  // CHECK-LABEL: test_mm_mask_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+  return _mm_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtepu64_ph(__mmask8 A, __m128i B) {
+  // CHECK-LABEL: test_mm_maskz_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128
+  return _mm_maskz_cvtepu64_ph(A, B);
+}
+
+__m128h test_mm256_cvtepu64_ph(__m256i A) {
+  // CHECK-LABEL: test_mm256_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+  return _mm256_cvtepu64_ph(A);
+}
+
+__m128h test_mm256_mask_cvtepu64_ph(__m128h A, __mmask8 B, __m256i C) {
+  // CHECK-LABEL: test_mm256_mask_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+  return _mm256_mask_cvtepu64_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtepu64_ph(__mmask8 A, __m256i B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtepu64_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256
+  return _mm256_maskz_cvtepu64_ph(A, B);
+}
+
+__m128i test_mm_cvtph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+  return _mm_cvtph_epu64(A);
+}
+
+__m128i test_mm_mask_cvtph_epu64(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+  return _mm_mask_cvtph_epu64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.128
+  return _mm_maskz_cvtph_epu64(A, B);
+}
+
+__m256i test_mm256_cvtph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+  return _mm256_cvtph_epu64(A);
+}
+
+__m256i test_mm256_mask_cvtph_epu64(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+  return _mm256_mask_cvtph_epu64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvtph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2uqq.256
+  return _mm256_maskz_cvtph_epu64(A, B);
+}
+
+__m128i test_mm_cvttph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+  return _mm_cvttph_epi64(A);
+}
+
+__m128i test_mm_mask_cvttph_epi64(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+  return _mm_mask_cvttph_epi64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.128
+  return _mm_maskz_cvttph_epi64(A, B);
+}
+
+__m256i test_mm256_cvttph_epi64(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+  return _mm256_cvttph_epi64(A);
+}
+
+__m256i test_mm256_mask_cvttph_epi64(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+  return _mm256_mask_cvttph_epi64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epi64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epi64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2qq.256
+  return _mm256_maskz_cvttph_epi64(A, B);
+}
+
+__m128i test_mm_cvttph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+  return _mm_cvttph_epu64(A);
+}
+
+__m128i test_mm_mask_cvttph_epu64(__m128i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+  return _mm_mask_cvttph_epu64(A, B, C);
+}
+
+__m128i test_mm_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.128
+  return _mm_maskz_cvttph_epu64(A, B);
+}
+
+__m256i test_mm256_cvttph_epu64(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+  return _mm256_cvttph_epu64(A);
+}
+
+__m256i test_mm256_mask_cvttph_epu64(__m256i A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+  return _mm256_mask_cvttph_epu64(A, B, C);
+}
+
+__m256i test_mm256_maskz_cvttph_epu64(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvttph_epu64
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvttph2uqq.256
+  return _mm256_maskz_cvttph_epu64(A, B);
+}
+
+__m128 test_mm_cvtxph_ps(__m128h A) {
+  // CHECK-LABEL: test_mm_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+  return _mm_cvtxph_ps(A);
+}
+
+__m128 test_mm_mask_cvtxph_ps(__m128 A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm_mask_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+  return _mm_mask_cvtxph_ps(A, B, C);
+}
+
+__m128 test_mm_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm_maskz_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.128
+  return _mm_maskz_cvtxph_ps(A, B);
+}
+
+__m256 test_mm256_cvtxph_ps(__m128h A) {
+  // CHECK-LABEL: test_mm256_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+  return _mm256_cvtxph_ps(A);
+}
+
+__m256 test_mm256_mask_cvtxph_ps(__m256 A, __mmask8 B, __m128h C) {
+  // CHECK-LABEL: test_mm256_mask_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+  return _mm256_mask_cvtxph_ps(A, B, C);
+}
+
+__m256 test_mm256_maskz_cvtxph_ps(__mmask8 A, __m128h B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtxph_ps
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtph2psx.256
+  return _mm256_maskz_cvtxph_ps(A, B);
+}
+
+__m128h test_mm_cvtxps_ph(__m128 A) {
+  // CHECK-LABEL: test_mm_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+  return _mm_cvtxps_ph(A);
+}
+
+__m128h test_mm_mask_cvtxps_ph(__m128h A, __mmask8 B, __m128 C) {
+  // CHECK-LABEL: test_mm_mask_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+  return _mm_mask_cvtxps_ph(A, B, C);
+}
+
+__m128h test_mm_maskz_cvtxps_ph(__mmask8 A, __m128 B) {
+  // CHECK-LABEL: test_mm_maskz_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.128
+  return _mm_maskz_cvtxps_ph(A, B);
+}
+
+__m128h test_mm256_cvtxps_ph(__m256 A) {
+  // CHECK-LABEL: test_mm256_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+  return _mm256_cvtxps_ph(A);
+}
+
+__m128h test_mm256_mask_cvtxps_ph(__m128h A, __mmask8 B, __m256 C) {
+  // CHECK-LABEL: test_mm256_mask_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+  return _mm256_mask_cvtxps_ph(A, B, C);
+}
+
+__m128h test_mm256_maskz_cvtxps_ph(__mmask8 A, __m256 B) {
+  // CHECK-LABEL: test_mm256_maskz_cvtxps_ph
+  // CHECK: @llvm.x86.avx512fp16.mask.vcvtps2phx.256
+  return _mm256_maskz_cvtxps_ph(A, B);
+}
+
 __m128h test_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
   // CHECK-LABEL: @test_mm_mask_blend_ph
   // CHECK:  %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index eba83493e686d..72e9c3404775d 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5224,4 +5224,321 @@ let TargetPrefix = "x86" in {
         Intrinsic<[ llvm_i32_ty ],
                   [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
                   [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2psx_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2psx_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+        Intrinsic<[ llvm_v8f32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2psx_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+        Intrinsic<[ llvm_v16f32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_128
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_256
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtps2phx_512
+      : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+        Intrinsic<[ llvm_v16f16_ty ],
+                  [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtpd2ph_512
+      : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+        Intrinsic<[ llvm_v2f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+        Intrinsic<[ llvm_v4f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2pd_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+        Intrinsic<[ llvm_v8f64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsh2ss_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+        Intrinsic<[ llvm_v4f32_ty ],
+                  [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtss2sh_round
+      : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsd2sh_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+  def int_x86_avx512fp16_mask_vcvtsh2sd_round
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+        Intrinsic<[ llvm_v2f64_ty ],
+                  [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
+                    llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2w_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2w_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2w_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2w_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uw_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+        Intrinsic<[ llvm_v8i16_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+        Intrinsic<[ llvm_v16i16_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+                  [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uw_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+        Intrinsic<[ llvm_v32i16_ty ],
+                  [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtph2dq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2dq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2dq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2udq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtdq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtudq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2dq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+        Intrinsic<[ llvm_v4i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+        Intrinsic<[ llvm_v8i32_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2udq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+        Intrinsic<[ llvm_v16i32_ty ],
+                  [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_mask_vcvtqq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtqq2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2qq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvtuqq2ph_128
+      : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtuqq2ph_256
+      : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_128
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_256
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvtph2uqq_512
+      : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2qq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_128
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+        Intrinsic<[ llvm_v2i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_256
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+        Intrinsic<[ llvm_v4i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+  def int_x86_avx512fp16_mask_vcvttph2uqq_512
+      : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+        Intrinsic<[ llvm_v8i64_ty ],
+                  [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+  def int_x86_avx512fp16_vcvtsh2si32
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2usi32
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2si64
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtsh2usi64
+      : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvtusi2sh
+      : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtusi642sh
+      : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtsi2sh
+      : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvtsi642sh
+      : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+        Intrinsic<[ llvm_v8f16_ty ],
+                  [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+  def int_x86_avx512fp16_vcvttsh2si32
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2si64
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2usi32
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+        Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+  def int_x86_avx512fp16_vcvttsh2usi64
+      : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+        Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+                  [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
 }

diff  --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c73172612b1e1..dd28780ac946f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
 HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
 HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
 HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
+HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
 HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
 HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
 HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 74946c09fad96..201697c37140f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -237,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
       return FPEXT_F16_F32;
     if (RetVT == MVT::f64)
       return FPEXT_F16_F64;
+    if (RetVT == MVT::f80)
+      return FPEXT_F16_F80;
     if (RetVT == MVT::f128)
       return FPEXT_F16_F128;
   } else if (OpVT == MVT::f32) {

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c191d4d19fa84..9d735c8b3b0af 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1931,6 +1931,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SETCC,                MVT::f16, Custom);
     setOperationAction(ISD::STRICT_FSETCC,        MVT::f16, Custom);
     setOperationAction(ISD::STRICT_FSETCCS,       MVT::f16, Custom);
+    setOperationAction(ISD::FP_ROUND,             MVT::f16, Custom);
+    setOperationAction(ISD::STRICT_FP_ROUND,      MVT::f16, Custom);
+    setOperationAction(ISD::STRICT_FP_EXTEND,     MVT::f32, Legal);
+    if (isTypeLegal(MVT::f80)) {
+      setOperationAction(ISD::FP_EXTEND,          MVT::f80, Custom);
+      setOperationAction(ISD::STRICT_FP_EXTEND,   MVT::f80, Custom);
+    }
 
     setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
     setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
@@ -1939,8 +1946,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setGroup(MVT::v32f16);
       addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
       setOperationAction(ISD::SCALAR_TO_VECTOR,       MVT::v32f16, Custom);
+      setOperationAction(ISD::SINT_TO_FP,             MVT::v32i16, Legal);
+      setOperationAction(ISD::STRICT_SINT_TO_FP,      MVT::v32i16, Legal);
+      setOperationAction(ISD::UINT_TO_FP,             MVT::v32i16, Legal);
+      setOperationAction(ISD::STRICT_UINT_TO_FP,      MVT::v32i16, Legal);
+      setOperationAction(ISD::STRICT_FP_ROUND,        MVT::v16f16, Legal);
+      setOperationAction(ISD::STRICT_FP_EXTEND,       MVT::v16f32, Legal);
       setOperationAction(ISD::INSERT_VECTOR_ELT,      MVT::v32f16, Custom);
 
+      setOperationAction(ISD::FP_TO_SINT,             MVT::v32i16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_SINT,      MVT::v32i16, Custom);
+      setOperationAction(ISD::FP_TO_UINT,             MVT::v32i16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_UINT,      MVT::v32i16, Custom);
+      setOperationPromotedToType(ISD::FP_TO_SINT,     MVT::v32i8,  MVT::v32i16);
+      setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
+                                 MVT::v32i16);
+      setOperationPromotedToType(ISD::FP_TO_UINT,     MVT::v32i8,  MVT::v32i16);
+      setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
+                                 MVT::v32i16);
+      setOperationPromotedToType(ISD::FP_TO_SINT,     MVT::v32i1,  MVT::v32i16);
+      setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
+                                 MVT::v32i16);
+      setOperationPromotedToType(ISD::FP_TO_UINT,     MVT::v32i1,  MVT::v32i16);
+      setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
+                                 MVT::v32i16);
+
       setOperationAction(ISD::EXTRACT_SUBVECTOR,      MVT::v16f16, Legal);
       setOperationAction(ISD::INSERT_SUBVECTOR,       MVT::v32f16, Legal);
       setOperationAction(ISD::CONCAT_VECTORS,         MVT::v32f16, Custom);
@@ -1960,6 +1990,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
       setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8f16,  Legal);
       setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16f16, Custom);
+      setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Legal);
+      setOperationAction(ISD::STRICT_SINT_TO_FP,  MVT::v16i16, Legal);
+      setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16,  Legal);
+      setOperationAction(ISD::STRICT_SINT_TO_FP,  MVT::v8i16,  Legal);
+      setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Legal);
+      setOperationAction(ISD::STRICT_UINT_TO_FP,  MVT::v16i16, Legal);
+      setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16,  Legal);
+      setOperationAction(ISD::STRICT_UINT_TO_FP,  MVT::v8i16,  Legal);
+
+      setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_SINT,  MVT::v8i16, Custom);
+      setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_UINT,  MVT::v8i16, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND,    MVT::v8f16, Legal);
+      setOperationAction(ISD::STRICT_FP_EXTEND,   MVT::v8f32, Legal);
 
       // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
       setOperationAction(ISD::INSERT_VECTOR_ELT,    MVT::v8f16,  Custom);
@@ -2001,6 +2046,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
     }
 
+    if (Subtarget.hasFP16()) {
+      // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
+      setOperationAction(ISD::FP_TO_SINT,        MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
+      setOperationAction(ISD::FP_TO_UINT,        MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
+      setOperationAction(ISD::FP_TO_SINT,        MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
+      setOperationAction(ISD::FP_TO_UINT,        MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
+      // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
+      setOperationAction(ISD::SINT_TO_FP,        MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
+      setOperationAction(ISD::UINT_TO_FP,        MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
+      setOperationAction(ISD::SINT_TO_FP,        MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
+      setOperationAction(ISD::UINT_TO_FP,        MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
+      // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
+      setOperationAction(ISD::FP_ROUND,          MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND,   MVT::v2f16, Custom);
+      setOperationAction(ISD::FP_ROUND,          MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND,   MVT::v4f16, Custom);
+      // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
+      setOperationAction(ISD::FP_EXTEND,         MVT::v2f16, Custom);
+      setOperationAction(ISD::STRICT_FP_EXTEND,  MVT::v2f16, Custom);
+      setOperationAction(ISD::FP_EXTEND,         MVT::v4f16, Custom);
+      setOperationAction(ISD::STRICT_FP_EXTEND,  MVT::v4f16, Custom);
+    }
+
     setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
     setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
     setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
@@ -19993,6 +20069,43 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
                      DAG.getIntPtrConstant(0, dl));
 }
 
+// Try to use a packed vector operation to handle i64 on 32-bit targets.
+static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
+                                 const X86Subtarget &Subtarget) {
+  assert((Op.getOpcode() == ISD::SINT_TO_FP ||
+          Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+          Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+          Op.getOpcode() == ISD::UINT_TO_FP) &&
+         "Unexpected opcode!");
+  bool IsStrict = Op->isStrictFPOpcode();
+  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+  MVT SrcVT = Src.getSimpleValueType();
+  MVT VT = Op.getSimpleValueType();
+
+  if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
+    return SDValue();
+
+  // Pack the i64 into a vector, do the operation and extract.
+
+  assert(Subtarget.hasFP16() && "Expected FP16");
+
+  SDLoc dl(Op);
+  SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+  if (IsStrict) {
+    SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
+                                 {Op.getOperand(0), InVec});
+    SDValue Chain = CvtVec.getValue(1);
+    SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+                                DAG.getIntPtrConstant(0, dl));
+    return DAG.getMergeValues({Value, Chain}, dl);
+  }
+
+  SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
+
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+                     DAG.getIntPtrConstant(0, dl));
+}
+
 static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
                           const X86Subtarget &Subtarget) {
   switch (Opcode) {
@@ -20245,6 +20358,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
 
   if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
     return V;
+  if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+    return V;
 
   // SSE doesn't have an i16 conversion so we need to promote.
   if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
@@ -20724,6 +20839,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
     return V;
+  if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+    return V;
 
   // The transform for i64->f64 isn't correct for 0 when rounding to negative
   // infinity. It produces -0.0, so disable under strictfp.
@@ -21505,9 +21622,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
   MVT VT = Op->getSimpleValueType(0);
   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+  SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
   MVT SrcVT = Src.getSimpleValueType();
   SDLoc dl(Op);
 
+  SDValue Res;
   if (VT.isVector()) {
     if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
       MVT ResVT = MVT::v4i32;
@@ -21532,10 +21651,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
         Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
                           DAG.getIntPtrConstant(0, dl));
       }
-      SDValue Res, Chain;
       if (IsStrict) {
-        Res =
-            DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
+        Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
         Chain = Res.getValue(1);
       } else {
         Res = DAG.getNode(Opc, dl, ResVT, Src);
@@ -21549,6 +21666,67 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
       return Res;
     }
 
+    if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
+      if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
+        return Op;
+
+      MVT ResVT = VT;
+      MVT EleVT = VT.getVectorElementType();
+      if (EleVT != MVT::i64)
+        ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+      if (SrcVT != MVT::v8f16) {
+        SDValue Tmp =
+            IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+        SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+        Ops[0] = Src;
+        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+      }
+
+      if (IsStrict) {
+        Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
+                                   : X86ISD::STRICT_CVTTP2UI,
+                          dl, {ResVT, MVT::Other}, {Chain, Src});
+        Chain = Res.getValue(1);
+      } else {
+        Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
+                          ResVT, Src);
+      }
+
+      // TODO: Need to add exception check code for strict FP.
+      if (EleVT.getSizeInBits() < 16) {
+        ResVT = MVT::getVectorVT(EleVT, 8);
+        Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
+      }
+
+      if (ResVT != VT)
+        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+                          DAG.getIntPtrConstant(0, dl));
+
+      if (IsStrict)
+        return DAG.getMergeValues({Res, Chain}, dl);
+      return Res;
+    }
+
+    if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+      if (IsStrict) {
+        Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
+                                   : ISD::STRICT_FP_TO_UINT,
+                          dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+        Chain = Res.getValue(1);
+      } else {
+        Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
+                          MVT::v8i32, Src);
+      }
+
+      // TODO: Need to add exception check code for strict FP.
+      Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+
+      if (IsStrict)
+        return DAG.getMergeValues({Res, Chain}, dl);
+      return Res;
+    }
+
     // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
     if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
       assert(!IsSigned && "Expected unsigned conversion!");
@@ -21572,10 +21750,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
       Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
                         DAG.getIntPtrConstant(0, dl));
 
-      SDValue Res, Chain;
       if (IsStrict) {
         Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
-                          {Op->getOperand(0), Src});
+                          {Chain, Src});
         Chain = Res.getValue(1);
       } else {
         Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
@@ -21603,10 +21780,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
       Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
                         DAG.getIntPtrConstant(0, dl));
 
-      SDValue Res, Chain;
       if (IsStrict) {
         Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
-                          {Op->getOperand(0), Src});
+                          {Chain, Src});
         Chain = Res.getValue(1);
       } else {
         Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
@@ -21631,7 +21807,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
         SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
                                   {Src, Zero, Zero, Zero});
         Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
-                          {Op->getOperand(0), Tmp});
+                          {Chain, Tmp});
         SDValue Chain = Tmp.getValue(1);
         Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
                           DAG.getIntPtrConstant(0, dl));
@@ -21714,17 +21890,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
     // FIXME: This does not generate an invalid exception if the input does not
     // fit in i32. PR44019
     if (Subtarget.is64Bit()) {
-      SDValue Res, Chain;
       if (IsStrict) {
-        Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
-                          { Op.getOperand(0), Src });
+        Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
+                          {Chain, Src});
         Chain = Res.getValue(1);
       } else
         Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
 
       Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
       if (IsStrict)
-        return DAG.getMergeValues({ Res, Chain }, dl);
+        return DAG.getMergeValues({Res, Chain}, dl);
       return Res;
     }
 
@@ -21739,17 +21914,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   // fit in i16. PR44019
   if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
     assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
-    SDValue Res, Chain;
     if (IsStrict) {
-      Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
-                        { Op.getOperand(0), Src });
+      Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
+                        {Chain, Src});
       Chain = Res.getValue(1);
     } else
       Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
 
     Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
     if (IsStrict)
-      return DAG.getMergeValues({ Res, Chain }, dl);
+      return DAG.getMergeValues({Res, Chain}, dl);
     return Res;
   }
 
@@ -21765,7 +21939,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
     else
       LC = RTLIB::getFPTOUINT(SrcVT, VT);
 
-    SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
     MakeLibCallOptions CallOptions;
     std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
                                                   SDLoc(Op), Chain);
@@ -21777,7 +21950,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Fall back to X87.
-  SDValue Chain;
   if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
     if (IsStrict)
       return DAG.getMergeValues({V, Chain}, dl);
@@ -22004,6 +22176,35 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   if (VT == MVT::f128)
     return SDValue();
 
+  if (VT == MVT::f80) {
+    if (SVT == MVT::f16) {
+      assert(Subtarget.hasFP16() && "Unexpected features!");
+      RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+      MakeLibCallOptions CallOptions;
+      std::pair<SDValue, SDValue> Tmp =
+          makeLibCall(DAG, LC, VT, In, CallOptions, DL,
+                      IsStrict ? Op.getOperand(0) : SDValue());
+      if (IsStrict)
+        return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
+      else
+        return Tmp.first;
+    }
+    return Op;
+  }
+
+  if (SVT.getVectorElementType() == MVT::f16) {
+    assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
+    if (SVT == MVT::v2f16)
+      In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
+                       DAG.getUNDEF(MVT::v2f16));
+    SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
+                              DAG.getUNDEF(MVT::v4f16));
+    if (IsStrict)
+      return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+                         {Op->getOperand(0), Res});
+    return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
+  }
+
   assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
 
   SDValue Res =
@@ -22017,8 +22218,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
   bool IsStrict = Op->isStrictFPOpcode();
   SDValue In = Op.getOperand(IsStrict ? 1 : 0);
-  // It's legal except when f128 is involved
-  if (In.getSimpleValueType() != MVT::f128)
+  MVT VT = Op.getSimpleValueType();
+  MVT SVT = In.getSimpleValueType();
+
+  // It's legal except when f128 is involved or we're converting f80->f16.
+  if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
     return Op;
 
   return SDValue();
@@ -31113,6 +31317,51 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Src = N->getOperand(IsStrict ? 1 : 0);
     EVT SrcVT = Src.getValueType();
 
+    if (VT.isVector() && Subtarget.hasFP16() &&
+        SrcVT.getVectorElementType() == MVT::f16) {
+      EVT EleVT = VT.getVectorElementType();
+      EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+      if (SrcVT != MVT::v8f16) {
+        SDValue Tmp =
+            IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+        SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+        Ops[0] = Src;
+        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+      }
+
+      SDValue Res, Chain;
+      if (IsStrict) {
+        unsigned Opc =
+            IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+        Res =
+            DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
+        Chain = Res.getValue(1);
+      } else {
+        unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+        Res = DAG.getNode(Opc, dl, ResVT, Src);
+      }
+
+      // TODO: Need to add exception check code for strict FP.
+      if (EleVT.getSizeInBits() < 16) {
+        MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
+        Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);
+
+        // Now widen to 128 bits.
+        unsigned NumConcats = 128 / TmpVT.getSizeInBits();
+        MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
+        SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
+        ConcatOps[0] = Res;
+        Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+      }
+
+      Results.push_back(Res);
+      if (IsStrict)
+        Results.push_back(Chain);
+
+      return;
+    }
+
     if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
              "Unexpected type action!");
@@ -31287,9 +31536,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
                     N->getOpcode() == ISD::STRICT_SINT_TO_FP;
     EVT VT = N->getValueType(0);
+    SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+    if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
+        Subtarget.hasVLX()) {
+      if (Src.getValueType().getVectorElementType() == MVT::i16)
+        return;
+
+      if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
+        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+                          IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
+                                   : DAG.getUNDEF(MVT::v2i32));
+      if (IsStrict) {
+        unsigned Opc =
+            IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
+        SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+                                  {N->getOperand(0), Src});
+        Results.push_back(Res);
+        Results.push_back(Res.getValue(1));
+      } else {
+        unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
+        Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
+      }
+      return;
+    }
     if (VT != MVT::v2f32)
       return;
-    SDValue Src = N->getOperand(IsStrict ? 1 : 0);
     EVT SrcVT = Src.getValueType();
     if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
       if (IsStrict) {
@@ -31390,14 +31661,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::FP_ROUND: {
     bool IsStrict = N->isStrictFPOpcode();
     SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+    EVT VT = N->getValueType(0);
+    EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
+    if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
+      SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
+                             : DAG.getUNDEF(MVT::v2f32);
+      Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
+    }
     if (!isTypeLegal(Src.getValueType()))
       return;
     SDValue V;
     if (IsStrict)
-      V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
-                      {N->getOperand(0), N->getOperand(1)});
+      V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
+                      {N->getOperand(0), Src});
     else
-      V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+      V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
     Results.push_back(V);
     if (IsStrict)
       Results.push_back(V.getValue(1));
@@ -31409,6 +31687,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     // No other ValueType for FP_EXTEND should reach this point.
     assert(N->getValueType(0) == MVT::v2f32 &&
            "Do not know how to legalize this Node");
+    if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
+      return;
+    bool IsStrict = N->isStrictFPOpcode();
+    SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+    SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
+                           : DAG.getUNDEF(MVT::v2f16);
+    SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
+    if (IsStrict)
+      V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
+                      {N->getOperand(0), V});
+    else
+      V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
+    Results.push_back(V);
+    if (IsStrict)
+      Results.push_back(V.getValue(1));
     return;
   }
   case ISD::INTRINSIC_W_CHAIN: {
@@ -49415,10 +49708,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   EVT InVT = Op0.getValueType();
 
+  // UINT_TO_FP(vXi1~15)  -> UINT_TO_FP(ZEXT(vXi1~15  to vXi16))
+  // UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
+  // UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
+  if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+    unsigned ScalarSize = InVT.getScalarSizeInBits();
+    if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+      return SDValue();
+    SDLoc dl(N);
+    EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+                                 ScalarSize < 16   ? MVT::i16
+                                 : ScalarSize < 32 ? MVT::i32
+                                                   : MVT::i64,
+                                 InVT.getVectorNumElements());
+    SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+    if (IsStrict)
+      return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
+                         {N->getOperand(0), P});
+    return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+  }
+
   // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
   // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
   // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
-  if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+  if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+      VT.getScalarType() != MVT::f16) {
     SDLoc dl(N);
     EVT DstVT = InVT.changeVectorElementType(MVT::i32);
     SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
@@ -49457,10 +49771,31 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   EVT InVT = Op0.getValueType();
 
+  // SINT_TO_FP(vXi1~15)  -> SINT_TO_FP(SEXT(vXi1~15  to vXi16))
+  // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+  // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
+  if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+    unsigned ScalarSize = InVT.getScalarSizeInBits();
+    if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+      return SDValue();
+    SDLoc dl(N);
+    EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+                                 ScalarSize < 16   ? MVT::i16
+                                 : ScalarSize < 32 ? MVT::i32
+                                                   : MVT::i64,
+                                 InVT.getVectorNumElements());
+    SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+    if (IsStrict)
+      return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+                         {N->getOperand(0), P});
+    return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+  }
+
   // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
   // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
   // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
-  if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+  if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+      VT.getScalarType() != MVT::f16) {
     SDLoc dl(N);
     EVT DstVT = InVT.changeVectorElementType(MVT::i32);
     SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
@@ -51306,6 +51641,9 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
     return SDValue();
 
+  if (Subtarget.hasFP16())
+    return SDValue();
+
   bool IsStrict = N->isStrictFPOpcode();
   EVT VT = N->getValueType(0);
   SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -51414,6 +51752,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
     return SDValue();
 
+  if (Subtarget.hasFP16())
+    return SDValue();
+
   EVT VT = N->getValueType(0);
   SDValue Src = N->getOperand(0);
   EVT SrcVT = Src.getValueType();

diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 34621b4e68dbf..7a2b6ade1796c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7531,8 +7531,8 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                                   X86VectorVTInfo DstVT, SDNode OpNode,
                                   SDNode OpNodeRnd,
                                   X86FoldableSchedWrite sched, string asm,
-                                  string aliasStr> {
-  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+                                  string aliasStr, Predicate prd = HasAVX512> {
+  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
@@ -7548,7 +7548,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                 [(set DstVT.RC:$dst, (OpNode
                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
-  } // Predicates = [HasAVX512]
+  } // Predicates = [prd]
 
   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7712,8 +7712,9 @@ def : Pat<(v2f64 (X86Movsd
 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
                             X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
                             SDNode OpNodeInt, SDNode OpNodeSAE,
-                            X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
+                            X86FoldableSchedWrite sched, string aliasStr,
+                            Predicate prd = HasAVX512> {
+let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
   let isCodeGenOnly = 1 in {
   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7740,7 +7741,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
               [(set _DstRC.RC:$dst,
                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
-} //HasAVX512
+} // Predicates = [prd]
 
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7838,33 +7839,47 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
                         EVEX_4V, VEX_LIG, Sched<[sched]>,
                         EVEX_B, EVEX_RC;
 }
-multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
+multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
                                       SDNode OpNode, SDNode OpNodeRnd,
                                       X86FoldableSchedWrite sched,
-                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
-  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
+                                      Predicate prd = HasAVX512> {
+  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
-                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
+                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
   }
 }
 
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
-                                      SDNode OpNode, SDNode OpNodeSAE,
-                                      X86FoldableSchedWrite sched,
-                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
-  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
+                                       SDNode OpNode, SDNode OpNodeSAE,
+                                       X86FoldableSchedWrite sched,
+                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
+                                       Predicate prd = HasAVX512> {
+  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
-             EVEX_CD8<32, CD8VT1>, XS;
+             EVEX_CD8<_src.EltSize, CD8VT1>;
   }
 }
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
-                                         f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+                                         f32x_info>, XD, VEX_W;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
-                                          f64x_info>;
+                                          f64x_info>, XS;
+defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
+                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
+                                          f16x_info, HasFP16>, T_MAP5XD, VEX_W;
+defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
+                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+                                          f64x_info, HasFP16>, T_MAP5XS;
+defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
+                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
+                                          f16x_info, HasFP16>, T_MAP5PS;
+defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
+                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+                                          f32x_info, HasFP16>, T_MAP6PS;
 
 def : Pat<(f64 (any_fpextend FR32X:$src)),
           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
@@ -7877,6 +7892,27 @@ def : Pat<(f32 (any_fpround FR64X:$src)),
           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
            Requires<[HasAVX512]>;
 
+def : Pat<(f32 (any_fpextend FR16X:$src)),
+          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
+          Requires<[HasFP16]>;
+def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
+          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+          Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f64 (any_fpextend FR16X:$src)),
+          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
+          Requires<[HasFP16]>;
+def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
+          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
+          Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f16 (any_fpround FR32X:$src)),
+          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
+           Requires<[HasFP16]>;
+def : Pat<(f16 (any_fpround FR64X:$src)),
+          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
+           Requires<[HasFP16]>;
+
 def : Pat<(v4f32 (X86Movss
                    (v4f32 VR128X:$dst),
                    (v4f32 (scalar_to_vector
@@ -7990,39 +8026,82 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
 
-// Extend Float to Double
-multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
-                           X86SchedWriteWidths sched> {
-  let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
+// Extend [Float to Double, Half to Float]
+multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
+                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
+  let Predicates = [prd] in {
+    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
                             any_fpextend, fpextend, sched.ZMM>,
-             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
   }
-  let Predicates = [HasVLX] in {
-    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
-                               X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
+                               X86any_vfpext, X86vfpext, sched.XMM,
+                               _dst.info128.BroadcastStr,
                                "", f64mem>, EVEX_V128;
-    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
-                                     any_fpextend, fpextend, sched.YMM>, EVEX_V256;
-  }
-}
-
-// Truncate Double to Float
-multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
-  let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
+                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
+  }
+}
+
+// Truncate [Double to Float, Float to Half]
+multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
+                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
+                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
+                            PatFrag bcast256 = _src.info256.BroadcastLdFrag,
+                            PatFrag bcast512 = _src.info512.BroadcastLdFrag,
+                            PatFrag loadVT128 = _src.info128.LdFrag,
+                            PatFrag loadVT256 = _src.info256.LdFrag,
+                            PatFrag loadVT512 = _src.info512.LdFrag,
+                            RegisterClass maskRC128 = _src.info128.KRCWM,
+                            RegisterClass maskRC256 = _src.info256.KRCWM,
+                            RegisterClass maskRC512 = _src.info512.KRCWM> {
+  let Predicates = [prd] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
                             X86any_vfpround, X86vfpround, sched.ZMM>,
-             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
   }
-  let Predicates = [HasVLX] in {
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
-                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
-                               f128mem, VK2WM>, EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
+                               null_frag, null_frag, sched.XMM,
+                               _src.info128.BroadcastStr, "{x}",
+                               f128mem, maskRC128>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
                                X86any_vfpround, X86vfpround,
-                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
+
+    // Special patterns to allow use of X86vmfpround for masking. Instruction
+    // patterns have been disabled with null_frag.
+    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
+              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+                            maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
+    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+                            maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
+
+    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
+              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
+                            maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
+                            maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
+
+    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
+              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
+              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
   }
 
   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8066,40 +8145,185 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
                   VK4WM:$mask, f64mem:$src), 0, "att">;
 }
 
-defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
+defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
+                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
-                                  PS, EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
+                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
+                                   PS, EVEX_CD8<32, CD8VH>;
 
-let Predicates = [HasVLX] in {
+// Extend Half to Double
+multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
+                            X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
+                                  any_fpextend, fpextend, sched.ZMM>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
+                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
+    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
+                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
+                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
+                                     f32mem>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
+                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
+                                     f64mem>, EVEX_V256;
+  }
+}
+
+// Truncate Double to Half
+multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
+                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
+                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
+                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+                               VK2WM>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
+                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
+                               VK4WM>, EVEX_V256;
+  }
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                  VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                  VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+                  "$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+                  VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+                  "$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+                  VK8WM:$mask, VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+                  VK8WM:$mask, VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+                  VK8WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+                  VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
+                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
+                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
+defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
+                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
+                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
+defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
+                                 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
+                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
+
+let Predicates = [HasFP16, HasVLX] in {
   // Special patterns to allow use of X86vmfpround for masking. Instruction
   // patterns have been disabled with null_frag.
-  def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
-            (VCVTPD2PSZ128rr VR128X:$src)>;
-  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
+            (VCVTPD2PHZ256rr VR256X:$src)>;
+  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
+                          VK4WM:$mask)),
+            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+                          VK4WM:$mask),
+            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
+            (VCVTPD2PHZ256rm addr:$src)>;
+  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
+                          VK4WM:$mask),
+            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
+                          VK4WM:$mask),
+            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTPD2PHZ256rmb addr:$src)>;
+  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+                          (v8f16 VR128X:$src0), VK4WM:$mask),
+            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
+            (VCVTPD2PHZ128rr VR128X:$src)>;
+  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
                           VK2WM:$mask),
-            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
-  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
                           VK2WM:$mask),
-            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
 
-  def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
-            (VCVTPD2PSZ128rm addr:$src)>;
-  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
+            (VCVTPD2PHZ128rm addr:$src)>;
+  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
                           VK2WM:$mask),
-            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
                           VK2WM:$mask),
-            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
 
-  def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
-            (VCVTPD2PSZ128rmb addr:$src)>;
+  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTPD2PHZ128rmb addr:$src)>;
   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
-                          (v4f32 VR128X:$src0), VK2WM:$mask),
-            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+                          (v8f16 VR128X:$src0), VK2WM:$mask),
+            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
-                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
-            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
 }
 
 // Convert Signed/Unsigned Doubleword to Double
@@ -8420,26 +8644,60 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
 }
 
 // Convert Signed/Unsigned Quardword to Float
-multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
-                           SDNode MaskOpNode, SDNode OpNodeRnd,
-                           X86SchedWriteWidths sched> {
-  let Predicates = [HasDQI] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
+// Also Convert Signed/Unsigned Doubleword to Half
+multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
+                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
+                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
+  let Predicates = [prd] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
                             MaskOpNode, sched.ZMM>,
-             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
                                OpNodeRnd, sched.ZMM>, EVEX_V512;
   }
-  let Predicates = [HasDQI, HasVLX] in {
+  let Predicates = [prd, HasVLX] in {
     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
     // memory forms of these instructions in Asm Parcer. They have the same
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
-                               null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
+                               null_frag, sched.XMM, _src.info128.BroadcastStr,
+                               "{x}", i128mem, _src.info128.KRCWM>,
                                EVEX_V128, NotEVEX2VEXConvertible;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
-                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
+                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
+                               "{y}">, EVEX_V256,
                                NotEVEX2VEXConvertible;
+
+    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
+    // patterns have been disabled with null_frag.
+    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
+              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+                             _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
+    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+                             _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
+
+    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
+              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
+                             _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
+                             _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
+
+    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
+              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
+              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
   }
 
   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8581,13 +8839,29 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
                             VEX_W, XS, EVEX_CD8<64, CD8VF>;
 
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
-                            sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
-                            VEX_W, PS, EVEX_CD8<64, CD8VF>;
+defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
+                            X86any_VSintToFP, X86VMSintToFP,
+                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+                            SchedWriteCvtDQ2PS, HasFP16>,
+                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
+                            X86any_VUintToFP, X86VMUintToFP,
+                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
+                            EVEX_CD8<32, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
+                            X86any_VSintToFP, X86VMSintToFP,
+                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+                            SchedWriteCvtDQ2PS>, VEX_W, PS,
+                            EVEX_CD8<64, CD8VF>;
 
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
-                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
-                            VEX_W, XD, EVEX_CD8<64, CD8VF>;
+defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
+                            X86any_VUintToFP, X86VMUintToFP,
+                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+                            SchedWriteCvtDQ2PS>, VEX_W, XD,
+                            EVEX_CD8<64, CD8VF>;
 
 let Predicates = [HasVLX] in {
   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8777,66 +9051,6 @@ let Predicates = [HasVLX] in {
             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
 }
 
-let Predicates = [HasDQI, HasVLX] in {
-  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
-  // patterns have been disabled with null_frag.
-  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
-            (VCVTQQ2PSZ128rr VR128X:$src)>;
-  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
-                           VK2WM:$mask),
-            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
-  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
-                           VK2WM:$mask),
-            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
-  def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
-            (VCVTQQ2PSZ128rm addr:$src)>;
-  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
-                           VK2WM:$mask),
-            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
-                           VK2WM:$mask),
-            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
-  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
-            (VCVTQQ2PSZ128rmb addr:$src)>;
-  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
-                           (v4f32 VR128X:$src0), VK2WM:$mask),
-            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
-                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
-            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-
-  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
-  // patterns have been disabled with null_frag.
-  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
-            (VCVTUQQ2PSZ128rr VR128X:$src)>;
-  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
-                           VK2WM:$mask),
-            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
-  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
-                           VK2WM:$mask),
-            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
-  def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
-            (VCVTUQQ2PSZ128rm addr:$src)>;
-  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
-                           VK2WM:$mask),
-            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
-                           VK2WM:$mask),
-            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
-  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
-            (VCVTUQQ2PSZ128rmb addr:$src)>;
-  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
-                           (v4f32 VR128X:$src0), VK2WM:$mask),
-            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
-                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
-            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-}
-
 //===----------------------------------------------------------------------===//
 // Half precision conversion instructions
 //===----------------------------------------------------------------------===//
@@ -12663,3 +12877,510 @@ let hasSideEffects = 0 in {
   def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
 }
+
+// Convert 16-bit float to i16/u16
+multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                          AVX512VLVectorVTInfo _Dst,
+                          AVX512VLVectorVTInfo _Src,
+                          X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+                            OpNode, MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
+                               OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+  }
+}
+
+// Convert 16-bit float to i16/u16 truncate
+multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
+                           X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+                            OpNode, MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
+                               OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+  }
+}
+
+defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
+                                X86cvtp2UIntRnd, avx512vl_i16_info,
+                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
+                                X86VUintToFpRnd, avx512vl_f16_info,
+                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
+                                X86cvttp2si, X86cvttp2siSAE,
+                                avx512vl_i16_info, avx512vl_f16_info,
+                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
+                                X86cvttp2ui, X86cvttp2uiSAE,
+                                avx512vl_i16_info, avx512vl_f16_info,
+                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
+                                X86cvtp2IntRnd, avx512vl_i16_info,
+                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
+                                X86VSintToFpRnd, avx512vl_f16_info,
+                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
+
+// Convert Half to Signed/Unsigned Doubleword
+multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                           X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+                            MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
+                                OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.YMM>, EVEX_V256;
+  }
+}
+
+// Convert Half to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                            X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+                            MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
+                                OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.YMM>, EVEX_V256;
+  }
+}
+
+
+defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
+                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+                                 EVEX_CD8<16, CD8VH>;
+defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
+                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
+                                 EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
+                                X86cvttp2si, X86cvttp2siSAE,
+                                SchedWriteCvtPS2DQ>, T_MAP5XS,
+                                EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
+                                 X86cvttp2ui, X86cvttp2uiSAE,
+                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
+                                 EVEX_CD8<16, CD8VH>;
+
+// Convert Half to Signed/Unsigned Quardword
+multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                           X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+                            MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
+                               OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    // Explicitly specified broadcast string, since we take only 2 elements
+    // from v8f16x_info source
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
+                               EVEX_V128;
+    // Explicitly specified broadcast string, since we take only 4 elements
+    // from v8f16x_info source
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
+                               EVEX_V256;
+  }
+}
+
+// Convert Half to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                            X86SchedWriteWidths sched> {
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+                            MaskOpNode, sched.ZMM>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
+                                OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    // Explicitly specified broadcast string, since we take only 2 elements
+    // from v8f16x_info source
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
+    // Explicitly specified broadcast string, since we take only 4 elements
+    // from v8f16x_info source
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
+  }
+}
+
+defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
+                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+                                 EVEX_CD8<16, CD8VQ>;
+
+defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
+                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+                                 EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
+                                 X86cvttp2si, X86cvttp2siSAE,
+                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
+                                 EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
+                                 X86cvttp2ui, X86cvttp2uiSAE,
+                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
+                                 EVEX_CD8<16, CD8VQ>;
+
+// Convert Signed/Unsigned Quardword to Half
+multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+                           X86SchedWriteWidths sched> {
+  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
+  // 512 memory forms of these instructions in Asm Parcer. They have the same
+  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
+  // due to the same reason.
+  let Predicates = [HasFP16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
+                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
+                               OpNodeRnd, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasFP16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
+                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+                               i128mem, VK2WM>,
+                               EVEX_V128, NotEVEX2VEXConvertible;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
+                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
+                               i256mem, VK4WM>,
+                               EVEX_V256, NotEVEX2VEXConvertible;
+  }
+
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                  VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                  VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+                  "$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+                  VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+                  "$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+                  VK8WM:$mask, VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+                  VK8WM:$mask, VR512:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+                  VK8WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to8}}",
+                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+                  VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
+                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
+                            EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
+                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
+                            EVEX_CD8<64, CD8VF>;
+
+// Convert half to signed/unsigned int 32/64
+defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
+                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
+                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
+                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
+                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
+                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
+                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
+                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
+                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+                        "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
+                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
+                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+                        "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+let Predicates = [HasFP16] in {
+  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
+                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
+                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
+                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
+                                   T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
+                                    v8f16x_info, i32mem, loadi32,
+                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
+                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
+                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+
+  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
+            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
+            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
+            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
+            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
+            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
+            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
+  // which produce unnecessary vmovsh instructions
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
+            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
+            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
+            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
+            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
+            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
+            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
+            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+  def : Pat<(v8f16 (X86Movsh
+                     (v8f16 VR128X:$dst),
+                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
+            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+} // Predicates = [HasFP16]
+
+let Predicates = [HasFP16, HasVLX] in {
+  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
+            (VCVTQQ2PHZ256rr VR256X:$src)>;
+  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+                           VK4WM:$mask),
+            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+                           VK4WM:$mask),
+            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
+            (VCVTQQ2PHZ256rm addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+                           VK4WM:$mask),
+            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+                           VK4WM:$mask),
+            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTQQ2PHZ256rmb addr:$src)>;
+  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+                           (v8f16 VR128X:$src0), VK4WM:$mask),
+            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
+            (VCVTQQ2PHZ128rr VR128X:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
+            (VCVTQQ2PHZ128rm addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTQQ2PHZ128rmb addr:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+                           (v8f16 VR128X:$src0), VK2WM:$mask),
+            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
+            (VCVTUQQ2PHZ256rr VR256X:$src)>;
+  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+                           VK4WM:$mask),
+            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+                           VK4WM:$mask),
+            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
+            (VCVTUQQ2PHZ256rm addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+                           VK4WM:$mask),
+            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+                           VK4WM:$mask),
+            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTUQQ2PHZ256rmb addr:$src)>;
+  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+                           (v8f16 VR128X:$src0), VK4WM:$mask),
+            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
+            (VCVTUQQ2PHZ128rr VR128X:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
+            (VCVTUQQ2PHZ128rm addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+            (VCVTUQQ2PHZ128rmb addr:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+                           (v8f16 VR128X:$src0), VK2WM:$mask),
+            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+}

diff  --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 251d66575080e..44007b34fcfe2 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -4455,8 +4455,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
   { X86::VCVTNE2PS2BF16Z128rrk,     X86::VCVTNE2PS2BF16Z128rmk,     0 },
   { X86::VCVTNE2PS2BF16Z256rrk,     X86::VCVTNE2PS2BF16Z256rmk,     0 },
   { X86::VCVTNE2PS2BF16Zrrk,        X86::VCVTNE2PS2BF16Zrmk,        0 },
+  { X86::VCVTSD2SHZrr_Intk,         X86::VCVTSD2SHZrm_Intk,         TB_NO_REVERSE },
   { X86::VCVTSD2SSZrr_Intk,         X86::VCVTSD2SSZrm_Intk,         TB_NO_REVERSE },
+  { X86::VCVTSH2SDZrr_Intk,         X86::VCVTSH2SDZrm_Intk,         TB_NO_REVERSE },
+  { X86::VCVTSH2SSZrr_Intk,         X86::VCVTSH2SSZrm_Intk,         TB_NO_REVERSE },
   { X86::VCVTSS2SDZrr_Intk,         X86::VCVTSS2SDZrm_Intk,         TB_NO_REVERSE },
+  { X86::VCVTSS2SHZrr_Intk,         X86::VCVTSS2SHZrm_Intk,         TB_NO_REVERSE },
   { X86::VDBPSADBWZ128rrik,         X86::VDBPSADBWZ128rmik,         0 },
   { X86::VDBPSADBWZ256rrik,         X86::VDBPSADBWZ256rmik,         0 },
   { X86::VDBPSADBWZrrik,            X86::VDBPSADBWZrmik,            0 },

diff  --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index fe9fea40029b0..b562c360c359e 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,14 +130,12 @@ def X86vmtruncs  : SDNode<"X86ISD::VMTRUNCS",  SDTVmtrunc>;
 def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
 
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
-                        SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
-                                             SDTCVecEltisVT<1, f32>,
-                                             SDTCisSameSizeAs<0, 1>]>>;
+                        SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+                                             SDTCisFP<1>, SDTCisVec<1>]>>;
 
 def X86strict_vfpext  : SDNode<"X86ISD::STRICT_VFPEXT",
-                               SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
-                                                    SDTCVecEltisVT<1, f32>,
-                                                    SDTCisSameSizeAs<0, 1>]>,
+                               SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+                                                    SDTCisFP<1>, SDTCisVec<1>]>,
                                                     [SDNPHasChain]>;
 
 def X86any_vfpext : PatFrags<(ops node:$src),
@@ -145,13 +143,13 @@ def X86any_vfpext : PatFrags<(ops node:$src),
                                (X86vfpext node:$src)]>;
 
 def X86vfpround: SDNode<"X86ISD::VFPROUND",
-                        SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
-                                             SDTCVecEltisVT<1, f64>,
+                        SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+                                             SDTCisFP<1>, SDTCisVec<1>,
                                              SDTCisOpSmallerThanOp<0, 1>]>>;
 
 def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
-                        SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
-                                             SDTCVecEltisVT<1, f64>,
+                        SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+                                             SDTCisFP<1>, SDTCisVec<1>,
                                              SDTCisOpSmallerThanOp<0, 1>]>,
                                              [SDNPHasChain]>;
 
@@ -160,33 +158,32 @@ def X86any_vfpround : PatFrags<(ops node:$src),
                                (X86vfpround node:$src)]>;
 
 def X86frounds   : SDNode<"X86ISD::VFPROUNDS",
-                           SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+                           SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
                                                 SDTCisSameAs<0, 1>,
-                                                SDTCVecEltisVT<2, f64>,
+                                                SDTCisFP<2>, SDTCisVec<2>,
                                                 SDTCisSameSizeAs<0, 2>]>>;
 
 def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
-                        SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+                        SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
                                              SDTCisSameAs<0, 1>,
-                                             SDTCVecEltisVT<2, f64>,
+                                             SDTCisFP<2>, SDTCisVec<2>,
                                              SDTCisSameSizeAs<0, 2>,
                                              SDTCisVT<3, i32>]>>;
 
 def X86fpexts     : SDNode<"X86ISD::VFPEXTS",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
                                              SDTCisSameAs<0, 1>,
-                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisFP<2>, SDTCisVec<2>,
                                              SDTCisSameSizeAs<0, 2>]>>;
 def X86fpextsSAE  : SDNode<"X86ISD::VFPEXTS_SAE",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
                                              SDTCisSameAs<0, 1>,
-                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisFP<2>, SDTCisVec<2>,
                                              SDTCisSameSizeAs<0, 2>]>>;
 
 def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
-                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
-                                              SDTCVecEltisVT<1, f64>,
-                                              SDTCisSameSizeAs<0, 1>,
+                         SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+                                              SDTCisFP<1>, SDTCisVec<1>,
                                               SDTCisSameAs<0, 2>,
                                               SDTCVecEltisVT<3, i1>,
                                               SDTCisSameNumEltsAs<1, 3>]>>;
@@ -709,7 +706,6 @@ def X86cvtp2UInt     : SDNode<"X86ISD::CVTP2UI",  SDTFloatToInt>;
 // Masked versions of above
 def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
                                        SDTCisFP<0>, SDTCisInt<1>,
-                                       SDTCisSameSizeAs<0, 1>,
                                        SDTCisSameAs<0, 2>,
                                        SDTCVecEltisVT<3, i1>,
                                        SDTCisSameNumEltsAs<1, 3>]>;
@@ -757,12 +753,12 @@ def X86mcvtps2ph   : SDNode<"X86ISD::MCVTPS2PH",
                                              SDTCVecEltisVT<4, i1>,
                                              SDTCisSameNumEltsAs<1, 4>]> >;
 def X86vfpextSAE  : SDNode<"X86ISD::VFPEXT_SAE",
-                        SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
-                                             SDTCVecEltisVT<1, f32>,
+                        SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+                                             SDTCisFP<1>, SDTCisVec<1>,
                                              SDTCisOpSmallerThanOp<1, 0>]>>;
 def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
-                                             SDTCVecEltisVT<1, f64>,
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+                                             SDTCisFP<1>, SDTCisVec<1>,
                                              SDTCisOpSmallerThanOp<0, 1>,
                                              SDTCisVT<2, i32>]>>;
 

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ab5a34181cc3a..a57a956e8135e 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5177,6 +5177,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
   case X86::VCVTUSI642SDZrr_Int:
   case X86::VCVTUSI642SDZrrb_Int:
   case X86::VCVTUSI642SDZrm_Int:
+  case X86::VCVTSI2SHZrr:
+  case X86::VCVTSI2SHZrm:
+  case X86::VCVTSI2SHZrr_Int:
+  case X86::VCVTSI2SHZrrb_Int:
+  case X86::VCVTSI2SHZrm_Int:
+  case X86::VCVTSI642SHZrr:
+  case X86::VCVTSI642SHZrm:
+  case X86::VCVTSI642SHZrr_Int:
+  case X86::VCVTSI642SHZrrb_Int:
+  case X86::VCVTSI642SHZrm_Int:
+  case X86::VCVTUSI2SHZrr:
+  case X86::VCVTUSI2SHZrm:
+  case X86::VCVTUSI2SHZrr_Int:
+  case X86::VCVTUSI2SHZrrb_Int:
+  case X86::VCVTUSI2SHZrm_Int:
+  case X86::VCVTUSI642SHZrr:
+  case X86::VCVTUSI642SHZrm:
+  case X86::VCVTUSI642SHZrr_Int:
+  case X86::VCVTUSI642SHZrrb_Int:
+  case X86::VCVTUSI642SHZrm_Int:
     // Load folding won't effect the undef register update since the input is
     // a GPR.
     return OpNum == 1 && !ForLoadFold;
@@ -5278,6 +5298,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
   case X86::VSQRTSDZrb_Int:
   case X86::VSQRTSDZm:
   case X86::VSQRTSDZm_Int:
+  case X86::VCVTSD2SHZrr:
+  case X86::VCVTSD2SHZrr_Int:
+  case X86::VCVTSD2SHZrrb_Int:
+  case X86::VCVTSD2SHZrm:
+  case X86::VCVTSD2SHZrm_Int:
+  case X86::VCVTSS2SHZrr:
+  case X86::VCVTSS2SHZrr_Int:
+  case X86::VCVTSS2SHZrrb_Int:
+  case X86::VCVTSS2SHZrm:
+  case X86::VCVTSS2SHZrm_Int:
+  case X86::VCVTSH2SDZrr:
+  case X86::VCVTSH2SDZrr_Int:
+  case X86::VCVTSH2SDZrrb_Int:
+  case X86::VCVTSH2SDZrm:
+  case X86::VCVTSH2SDZrm_Int:
+  case X86::VCVTSH2SSZrr:
+  case X86::VCVTSH2SSZrr_Int:
+  case X86::VCVTSH2SSZrrb_Int:
+  case X86::VCVTSH2SSZrm:
+  case X86::VCVTSH2SSZrm_Int:
     return OpNum == 1;
   case X86::VMOVSSZrrk:
   case X86::VMOVSDZrrk:

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 70d7cecce31bd..3cf6ad248e9c5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1747,20 +1747,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
 // XMM only
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+                       [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
                        VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
 def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
+                       [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
                        VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
 
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
+                        [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
                         VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
+                        [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
                         VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
 } // Predicates = [HasAVX, NoVLX]
 
@@ -1771,11 +1771,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+                     [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
                      Sched<[WriteCvtPD2PS]>, SIMD_EXC;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
+                     [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
                      Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 8c33624e28f0d..db7e42b20cb14 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1006,6 +1006,117 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FMULS, X86ISD::FMULS_RND),
   X86_INTRINSIC_DATA(avx512fp16_mask_sub_sh_round, INTR_TYPE_SCALAR_MASK,
                      X86ISD::FSUBS, X86ISD::FSUBS_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtdq2ph_128, TRUNCATE_TO_REG,
+                     X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_128, TRUNCATE_TO_REG,
+                     X86ISD::VFPROUND, X86ISD::VMFPROUND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_256, TRUNCATE_TO_REG,
+                     X86ISD::VFPROUND, X86ISD::VMFPROUND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_256, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_512, INTR_TYPE_1OP_MASK_SAE,
+                     ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_512, INTR_TYPE_1OP_MASK_SAE,
+                     ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_128, TRUNCATE_TO_REG,
+                     X86ISD::VFPROUND, X86ISD::VMFPROUND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_256, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_128, TRUNCATE_TO_REG,
+                     X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_256, TRUNCATE_TO_REG,
+                     X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsd2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+                     X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtss2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+                     X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2UI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtudq2ph_128, TRUNCATE_TO_REG,
+                     X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_128, TRUNCATE_TO_REG,
+                     X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+  X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_256, TRUNCATE_TO_REG,
+                     X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
   X86_INTRINSIC_DATA(avx512fp16_max_ph_128, INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512fp16_max_ph_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512fp16_max_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
@@ -1015,6 +1126,23 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512fp16_mul_ph_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
   X86_INTRINSIC_DATA(avx512fp16_sub_ph_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
   X86_INTRINSIC_DATA(avx512fp16_vcomi_sh, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+  /*fp16 scalar convert instruction*/
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsi2sh, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtsi642sh, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtusi2sh, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
+                     X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(bmi_bextr_32,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bextr_64,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bzhi_32,          INTR_TYPE_2OP, X86ISD::BZHI, 0),

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
index 2a0433cd23071..8148585f8d793 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith-intrinsics.ll
@@ -282,3 +282,364 @@ define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1
   %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer
   ret <32 x half> %res1
 }
+
+declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32)
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd {sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(<8 x half>* %px0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2pd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <8 x half>, <8 x half>* %px0, align 16
+  %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4)
+  ret <8 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(<8 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtpd2phz (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <8 x double>, <8 x double>* %px0, align 64
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4)
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32)
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4)
+  ret <8 x half> %res
+}
+
+declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32)
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4)
+  ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32)
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4)
+  ret <2 x double> %res
+}
+
+declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32)
+
+define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8)
+  ret <16 x float> %res
+}
+
+declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32)
+
+define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4)
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4)
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4)
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2phx {ru-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vaddph %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9)
+  %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10)
+  %res2 = fadd <16 x half> %res, %res1
+  ret <16 x half> %res2
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
index da79411006d18..d827206318e76 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith-vl-intrinsics.ll
@@ -402,3 +402,403 @@ define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) {
   %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2)
   ret  <16 x half> %res0
 }
+
+declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
+
+define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
+
+define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
+  ret <2 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtpd2phy (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <4 x double>, <4 x double>* %px0, align 32
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtpd2phx (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <2 x double>, <2 x double>* %px0, align 16
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8)
+
+define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2)
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8)
+
+define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2)
+  ret <8 x float> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2)
+  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1)
+  %res2 = fadd <8 x half> %res, %res1
+  ret <8 x half> %res2
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
index 1b234387c07c2..194b1c48c3846 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
@@ -274,6 +274,68 @@ entry:
   ret <32 x i1> %0
 }
 
+define <8 x half>  @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
+; CHECK-LABEL: regression_test1:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vsubph %xmm1, %xmm0, %xmm2
+; CHECK-NEXT:    vaddph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
+; CHECK-NEXT:    retq
+entry:
+  %a = fsub <8 x half> %x, %y
+  %b = fadd <8 x half> %x, %y
+  %c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x half> %c
+}
+
+define <8 x i16>  @regression_test2(<8 x float> %x) #0 {
+; CHECK-LABEL: regression_test2:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0
+; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = fptoui <8 x float> %x to  <8 x i16>
+  ret <8 x i16> %a
+}
+
+define <8 x i16>  @regression_test3(<8 x float> %x) #0 {
+; CHECK-LABEL: regression_test3:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = fptosi <8 x float> %x to  <8 x i16>
+  ret <8 x i16> %a
+}
+
+define <8 x i16>  @regression_test4(<8 x double> %x) #0 {
+; CHECK-LABEL: regression_test4:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm0
+; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = fptoui <8 x double> %x to  <8 x i16>
+  ret <8 x i16> %a
+}
+
+define <8 x i16>  @regression_test5(<8 x double> %x) #0 {
+; CHECK-LABEL: regression_test5:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm0
+; CHECK-NEXT:    vpmovdw %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %a = fptosi <8 x double> %x to  <8 x i16>
+  ret <8 x i16> %a
+}
+
 define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
 ; CHECK-LABEL: fcmp_v8f16:
 ; CHECK:       ## %bb.0: ## %entry

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
new file mode 100644
index 0000000000000..e1bf6e3ba01ae
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-intrinsics.ll
@@ -0,0 +1,549 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 | FileCheck %s
+
+declare <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16>, i32)
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = sitofp <32 x i16> %arg0 to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_b_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+  %res0 = sitofp <32 x i16> %val to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_nomask_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <32 x i16> %arg0 to <32 x half>
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_z_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = sitofp <32 x i16> %arg0 to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %val = load <32 x i16>, <32 x i16>* %arg0
+  %res0 = call <32 x half> @llvm.x86.avx512.sitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_512_load_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %val = load <32 x i16>, <32 x i16>* %arg0
+  %res0 = sitofp <32 x i16> %val to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w {rd-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2w %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <32 x half>, <32 x half>* %arg0
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+
+declare <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16>, i32)
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_2(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = uitofp <32 x i16> %arg0 to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2(i16* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_b_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <32 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <32 x i16> %scalar_in_vector, <32 x i16> undef, <32 x i32> zeroinitializer
+  %res0 = uitofp <32 x i16> %val to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_r(<32 x i16> %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 10)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2(<32 x i16> %arg0) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_nomask_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <32 x i16> %arg0 to <32 x half>
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %arg0, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2(<32 x i16> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_z_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %res0 = uitofp <32 x i16> %arg0 to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> zeroinitializer
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %val = load <32 x i16>, <32 x i16>* %arg0
+  %res0 = call <32 x half> @llvm.x86.avx512.uitofp.round.v32f16.v32i16(<32 x i16> %val, i32 4)
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+define <32 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2(<32 x i16>* %arg0, <32 x half> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_512_load_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i32 %mask to <32 x i1>
+  %val = load <32 x i16>, <32 x i16>* %arg0
+  %res0 = uitofp <32 x i16> %val to <32 x half>
+  %res = select <32 x i1> %msk, <32 x half> %res0, <32 x half> %arg1
+  ret <32 x half> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_r(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw {rd-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 9)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <32 x half>, <32 x half>* %arg0
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_sae:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w {sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <32 x half>, <32 x half>* %arg0
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half>, <32 x i16>, i32, i32)
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_b(half* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi){1to32}, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <32 x half> undef, half %scalar, i32 0
+  %val = shufflevector <32 x half> %scalar_in_vector, <32 x half> undef, <32 x i32> zeroinitializer
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_sae(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_sae:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw {sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 %mask, i32 8)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask(<32 x half> %arg0, <32 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> %arg1, i32 -1, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_z(<32 x half> %arg0, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %arg0, <32 x i16> zeroinitializer, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_512_load(<32 x half>* %arg0, <32 x i16> %arg1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_512_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <32 x half>, <32 x half>* %arg0
+  %res = call <32 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.512(<32 x half> %val, <32 x i16> %arg1, i32 %mask, i32 4)
+  ret <32 x i16> %res
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
new file mode 100644
index 0000000000000..d17b677276b99
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll
@@ -0,0 +1,770 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %res0 = sitofp <16 x i16> %arg0 to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
+  %res0 = sitofp <16 x i16> %val to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = sitofp <16 x i16> %arg0 to <16 x half>
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %res0 = sitofp <16 x i16> %arg0 to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %val = load <16 x i16>, <16 x i16>* %arg0
+  %res0 = sitofp <16 x i16> %val to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2w %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <16 x half>, <16 x half>* %arg0
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %res0 = uitofp <16 x i16> %arg0 to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(i16* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer
+  %res0 = uitofp <16 x i16> %val to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = uitofp <16 x i16> %arg0 to <16 x half>
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %res0 = uitofp <16 x i16> %arg0 to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(<16 x i16>* %arg0, <16 x half> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i16 %mask to <16 x i1>
+  %val = load <16 x i16>, <16 x i16>* %arg0
+  %res0 = uitofp <16 x i16> %val to <16 x half>
+  %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1
+  ret <16 x half> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <16 x half>, <16 x half>* %arg0
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <16 x half>, <16 x half>* %arg0
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16)
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(half* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi){1to16}, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0
+  %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(<16 x half>* %arg0, <16 x i16> %arg1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <16 x half>, <16 x half>* %arg0
+  %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %res0 = sitofp <8 x i16> %arg0 to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
+  %res0 = sitofp <8 x i16> %val to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <8 x i16> %arg0 to <8 x half>
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %res0 = sitofp <8 x i16> %arg0 to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtw2ph (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %val = load <8 x i16>, <8 x i16>* %arg0
+  %res0 = sitofp <8 x i16> %val to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2w %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2w %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2w (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <8 x half>, <8 x half>* %arg0
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %res0 = uitofp <8 x i16> %arg0 to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(i16* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %scalar = load i16, i16* %arg0
+  %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0
+  %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer
+  %res0 = uitofp <8 x i16> %val to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <8 x i16> %arg0 to <8 x half>
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %res0 = uitofp <8 x i16> %arg0 to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(<8 x i16>* %arg0, <8 x half> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtuw2ph (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %msk = bitcast i8 %mask to <8 x i1>
+  %val = load <8 x i16>, <8 x i16>* %arg0
+  %res0 = uitofp <8 x i16> %val to <8 x half>
+  %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1
+  ret <8 x half> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtph2uw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <8 x half>, <8 x half>* %arg0
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2w (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <8 x half>, <8 x half>* %arg0
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8)
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(half* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi){1to8}, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %scalar = load half, half* %arg0
+  %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0
+  %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(<8 x half>* %arg0, <8 x i16> %arg1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvttph2uw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %val = load <8 x half>, <8 x half>* %arg0
+  %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <4 x half> @test_u16tofp4(<4 x i16> %arg0) {
+; CHECK-LABEL: test_u16tofp4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <4 x i16> %arg0 to <4 x half>
+  ret <4 x half> %res
+}
+
+define <2 x half> @test_s16tofp2(<2 x i16> %arg0) {
+; CHECK-LABEL: test_s16tofp2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <2 x i16> %arg0 to <2 x half>
+  ret <2 x half> %res
+}
+
+define <4 x half> @test_u8tofp4(<4 x i8> %arg0) {
+; CHECK-LABEL: test_u8tofp4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <4 x i8> %arg0 to <4 x half>
+  ret <4 x half> %res
+}
+
+define <2 x half> @test_s8tofp2(<2 x i8> %arg0) {
+; CHECK-LABEL: test_s8tofp2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <2 x i8> %arg0 to <2 x half>
+  ret <2 x half> %res
+}
+
+define <2 x half> @test_u1tofp2(<2 x i1> %arg0) {
+; CHECK-LABEL: test_u1tofp2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; CHECK-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <2 x i1> %arg0 to <2 x half>
+  ret <2 x half> %res
+}
+
+define <4 x half> @test_s17tofp4(<4 x i17> %arg0) {
+; CHECK-LABEL: test_s17tofp4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpslld $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrad $15, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <4 x i17> %arg0 to <4 x half>
+  ret <4 x half> %res
+}
+
+define <2 x half> @test_u33tofp2(<2 x i33> %arg0) {
+; CHECK-LABEL: test_u33tofp2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <2 x i33> %arg0 to <2 x half>
+  ret <2 x half> %res
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
new file mode 100644
index 0000000000000..e19ea8426e8ad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -0,0 +1,1029 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
+
+define half @f32tof16(float %b) nounwind {
+; X64-LABEL: f32tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: f32tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = fptrunc float %b to half
+  ret half %a
+}
+
+define half @f64tof16(double %b) nounwind {
+; X64-LABEL: f64tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: f64tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = fptrunc double %b to half
+  ret half %a
+}
+
+define <16 x half> @f32to16f16(<16 x float> %b) nounwind {
+; CHECK-LABEL: f32to16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <16 x float> %b to <16 x half>
+  ret <16 x half> %a
+}
+
+define <8 x half> @f32to8f16(<8 x float> %b) {
+; CHECK-LABEL: f32to8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <8 x float> %b to <8 x half>
+  ret <8 x half> %a
+}
+
+define <4 x half> @f32to4f16(<4 x float> %b) {
+; CHECK-LABEL: f32to4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <4 x float> %b to <4 x half>
+  ret <4 x half> %a
+}
+
+define <2 x half> @f32to2f16(<2 x float> %b) {
+; CHECK-LABEL: f32to2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <2 x float> %b to <2 x half>
+  ret <2 x half> %a
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8)
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8)
+
+define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) {
+; X64-LABEL: f32to4f16_mask:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1
+; X64-NEXT:    vcvtps2phx %xmm0, %xmm1 {%k1}
+; X64-NEXT:    vmovaps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: f32to4f16_mask:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vcvtps2phx %xmm0, %xmm1 {%k1}
+; X86-NEXT:    vmovaps %xmm1, %xmm0
+; X86-NEXT:    retl
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %a, <8 x half> %b, i8 %mask)
+  ret <8 x half> %res
+}
+
+define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) {
+; X64-LABEL: f32to8f16_mask:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1
+; X64-NEXT:    vcvtps2phx %ymm0, %xmm1 {%k1}
+; X64-NEXT:    vmovaps %xmm1, %xmm0
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+;
+; X86-LABEL: f32to8f16_mask:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vcvtps2phx %ymm0, %xmm1 {%k1}
+; X86-NEXT:    vmovaps %xmm1, %xmm0
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %a, <8 x half> %b, i8 %mask)
+  ret <8 x half> %res
+}
+
+define <8 x half> @f32to8f16_mask2(<8 x float> %b, <8 x i1> %mask) {
+; CHECK-LABEL: f32to8f16_mask2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm1
+; CHECK-NEXT:    vpmovw2m %xmm1, %k1
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <8 x float> %b to <8 x half>
+  %c = select <8 x i1>%mask, <8 x half>%a, <8 x half> zeroinitializer
+  ret <8 x half> %c
+}
+
+define <16 x half> @f32to16f16_mask(<16 x float> %b, <16 x i1> %mask) {
+; CHECK-LABEL: f32to16f16_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsllw $7, %xmm1, %xmm1
+; CHECK-NEXT:    vpmovb2m %xmm1, %k1
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <16 x float> %b to <16 x half>
+  %c = select <16 x i1>%mask, <16 x half>%a, <16 x half> zeroinitializer
+  ret <16 x half> %c
+}
+
+define float @f16tof32(half %b) nounwind {
+; X64-LABEL: f16tof32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: f16tof32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    retl
+  %a = fpext half %b to float
+  ret float %a
+}
+
+define double @f16tof64(half %b) nounwind {
+; X64-LABEL: f16tof64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: f16tof64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    vmovsh 8(%ebp), %xmm0
+; X86-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  %a = fpext half %b to double
+  ret double %a
+}
+
+define <16 x float> @f16to16f32(<16 x half> %b) nounwind {
+; CHECK-LABEL: f16to16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <16 x half> %b to <16 x float>
+  ret <16 x float> %a
+}
+
+define <8 x float> @f16to8f32(<8 x half> %b) nounwind {
+; CHECK-LABEL: f16to8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <8 x half> %b to <8 x float>
+  ret <8 x float> %a
+}
+
+define <4 x float> @f16to4f32(<4 x half> %b) nounwind {
+; CHECK-LABEL: f16to4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <4 x half> %b to <4 x float>
+  ret <4 x float> %a
+}
+
+define <2 x float> @f16to2f32(<2 x half> %b) nounwind {
+; CHECK-LABEL: f16to2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <2 x half> %b to <2 x float>
+  ret <2 x float> %a
+}
+
+define <16 x float> @f16to16f32_mask(<16 x half> %b, <16 x float> %b1, <16 x float> %a1) {
+; CHECK-LABEL: f16to16f32_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltps %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <16 x half> %b to <16 x float>
+  %mask = fcmp ogt <16 x float> %a1, %b1
+  %c = select <16 x i1> %mask, <16 x float> %a, <16 x float> zeroinitializer
+  ret <16 x float> %c
+}
+
+define <8 x float> @f16to8f32_mask(<8 x half> %b, <8 x float> %b1, <8 x float> %a1) {
+; CHECK-LABEL: f16to8f32_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltps %ymm2, %ymm1, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <8 x half> %b to <8 x float>
+  %mask = fcmp ogt <8 x float> %a1, %b1
+  %c = select <8 x i1> %mask, <8 x float> %a, <8 x float> zeroinitializer
+  ret <8 x float> %c
+}
+
+define <4 x float> @f16to4f32_mask(<4 x half> %b, <4 x float> %b1, <4 x float> %a1) {
+; CHECK-LABEL: f16to4f32_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltps %xmm2, %xmm1, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <4 x half> %b to <4 x float>
+  %mask = fcmp ogt <4 x float> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x float> %a, <4 x float> zeroinitializer
+  ret <4 x float> %c
+}
+
+define <2 x float> @f16to2f32_mask(<2 x half> %b, <2 x float> %b1, <2 x float> %a1) {
+; CHECK-LABEL: f16to2f32_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltps %xmm2, %xmm1, %k1
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <2 x half> %b to <2 x float>
+  %mask = fcmp ogt <2 x float> %a1, %b1
+  %c = select <2 x i1> %mask, <2 x float> %a, <2 x float> zeroinitializer
+  ret <2 x float> %c
+}
+
+define <2 x double> @f16to2f64(<2 x half> %b) nounwind {
+; CHECK-LABEL: f16to2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <2 x half> %b to <2 x double>
+  ret <2 x double> %a
+}
+
+define <2 x double> @f16to2f64_mask(<2 x half> %b, <2 x double> %b1, <2 x double> %a1) {
+; CHECK-LABEL: f16to2f64_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltpd %xmm2, %xmm1, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <2 x half> %b to <2 x double>
+  %mask = fcmp ogt <2 x double> %a1, %b1
+  %c = select <2 x i1> %mask, <2 x double> %a, <2 x double> zeroinitializer
+  ret <2 x double> %c
+}
+
+define <4 x double> @f16to4f64(<4 x half> %b) nounwind {
+; CHECK-LABEL: f16to4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <4 x half> %b to <4 x double>
+  ret <4 x double> %a
+}
+
+define <4 x double> @f16to4f64_mask(<4 x half> %b, <4 x double> %b1, <4 x double> %a1) {
+; CHECK-LABEL: f16to4f64_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltpd %ymm2, %ymm1, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <4 x half> %b to <4 x double>
+  %mask = fcmp ogt <4 x double> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
+  ret <4 x double> %c
+}
+
+define <8 x double> @f16to8f64(<8 x half> %b) nounwind {
+; CHECK-LABEL: f16to8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <8 x half> %b to <8 x double>
+  ret <8 x double> %a
+}
+
+define <8 x double> @f16to8f64_mask(<8 x half> %b, <8 x double> %b1, <8 x double> %a1) {
+; CHECK-LABEL: f16to8f64_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpltpd %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fpext <8 x half> %b to <8 x double>
+  %mask = fcmp ogt <8 x double> %a1, %b1
+  %c = select <8 x i1> %mask, <8 x double> %a, <8 x double> zeroinitializer
+  ret <8 x double> %c
+}
+
+define <2 x half> @f64to2f16(<2 x double> %b) {
+; CHECK-LABEL: f64to2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <2 x double> %b to <2 x half>
+  ret <2 x half> %a
+}
+
+define <4 x half> @f64to4f16(<4 x double> %b) {
+; CHECK-LABEL: f64to4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <4 x double> %b to <4 x half>
+  ret <4 x half> %a
+}
+
+define <8 x half> @f64to8f16(<8 x double> %b) {
+; CHECK-LABEL: f64to8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %a = fptrunc <8 x double> %b to <8 x half>
+  ret <8 x half> %a
+}
+
+define float @extload_f16_f32(half* %x) {
+; X64-LABEL: extload_f16_f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh (%rdi), %xmm0
+; X64-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_f16_f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovsh (%eax), %xmm0
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+  %a = load half, half* %x
+  %b = fpext half %a to float
+  ret float %b
+}
+
+define double @extload_f16_f64(half* %x) {
+; X64-LABEL: extload_f16_f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh (%rdi), %xmm0
+; X64-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_f16_f64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    vmovsh (%eax), %xmm0
+; X86-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+  %a = load half, half* %x
+  %b = fpext half %a to double
+  ret double %b
+}
+
+define float @extload_f16_f32_optsize(half* %x) optsize {
+; X64-LABEL: extload_f16_f32_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsh2ss (%rdi), %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_f16_f32_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsh2ss (%eax), %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, (%esp)
+; X86-NEXT:    flds (%esp)
+; X86-NEXT:    popl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+  %a = load half, half* %x
+  %b = fpext half %a to float
+  ret float %b
+}
+
+define double @extload_f16_f64_optsize(half* %x) optsize {
+; X64-LABEL: extload_f16_f64_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsh2sd (%rdi), %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_f16_f64_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    vcvtsh2sd (%eax), %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+  %a = load half, half* %x
+  %b = fpext half %a to double
+  ret double %b
+}
+
+define <16 x float> @extload_v16f16_v16f32(<16 x half>* %x) {
+; X64-LABEL: extload_v16f16_v16f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2psx (%rdi), %zmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v16f16_v16f32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2psx (%eax), %zmm0
+; X86-NEXT:    retl
+  %a = load <16 x half>, <16 x half>* %x
+  %b = fpext <16 x half> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+define <8 x float> @extload_v8f16_v8f32(<8 x half>* %x) {
+; X64-LABEL: extload_v8f16_v8f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2psx (%rdi), %ymm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v8f16_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2psx (%eax), %ymm0
+; X86-NEXT:    retl
+  %a = load <8 x half>, <8 x half>* %x
+  %b = fpext <8 x half> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+define <4 x float> @extload_v4f16_v4f32(<4 x half>* %x) {
+; X64-LABEL: extload_v4f16_v4f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2psx (%rdi), %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v4f16_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2psx (%eax), %xmm0
+; X86-NEXT:    retl
+  %a = load <4 x half>, <4 x half>* %x
+  %b = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+define <8 x double> @extload_v8f16_v8f64(<8 x half>* %x) {
+; X64-LABEL: extload_v8f16_v8f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2pd (%rdi), %zmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v8f16_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2pd (%eax), %zmm0
+; X86-NEXT:    retl
+  %a = load <8 x half>, <8 x half>* %x
+  %b = fpext <8 x half> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define <4 x double> @extload_v4f16_v4f64(<4 x half>* %x) {
+; X64-LABEL: extload_v4f16_v4f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2pd (%rdi), %ymm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v4f16_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2pd (%eax), %ymm0
+; X86-NEXT:    retl
+  %a = load <4 x half>, <4 x half>* %x
+  %b = fpext <4 x half> %a to <4 x double>
+  ret <4 x double> %b
+}
+
+define <2 x double> @extload_v2f16_v2f64(<2 x half>* %x) {
+; X64-LABEL: extload_v2f16_v2f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtph2pd (%rdi), %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: extload_v2f16_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtph2pd (%eax), %xmm0
+; X86-NEXT:    retl
+  %a = load <2 x half>, <2 x half>* %x
+  %b = fpext <2 x half> %a to <2 x double>
+  ret <2 x double> %b
+}
+
+define half @s8_to_half(i8 %x) {
+; X64-LABEL: s8_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: s8_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = sitofp i8 %x to half
+  ret half %a
+}
+
+define half @s16_to_half(i16 %x) {
+; X64-LABEL: s16_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: s16_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = sitofp i16 %x to half
+  ret half %a
+}
+
+define half @s32_to_half(i32 %x) {
+; X64-LABEL: s32_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: s32_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = sitofp i32 %x to half
+  ret half %a
+}
+
+define half @s64_to_half(i64 %x) {
+; X64-LABEL: s64_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: s64_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtqq2ph %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = sitofp i64 %x to half
+  ret half %a
+}
+
+define half @s128_to_half(i128 %x) {
+; X64-LABEL: s128_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    callq __floattihf at PLT
+; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X86-LABEL: s128_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 20
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovups %xmm0, (%esp)
+; X86-NEXT:    calll __floattihf
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+  %a = sitofp i128 %x to half
+  ret half %a
+}
+
+define half @u8_to_half(i8 %x) {
+; X64-LABEL: u8_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: u8_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = uitofp i8 %x to half
+  ret half %a
+}
+
+define half @u16_to_half(i16 %x) {
+; X64-LABEL: u16_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: u16_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = uitofp i16 %x to half
+  ret half %a
+}
+
+define half @u32_to_half(i32 %x) {
+; X64-LABEL: u32_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: u32_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = uitofp i32 %x to half
+  ret half %a
+}
+
+define half @u64_to_half(i64 %x) {
+; X64-LABEL: u64_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: u64_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; X86-NEXT:    retl
+  %a = uitofp i64 %x to half
+  ret half %a
+}
+
+define half @u128_to_half(i128 %x) {
+; X64-LABEL: u128_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    callq __floatuntihf at PLT
+; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X86-LABEL: u128_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 20
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovups %xmm0, (%esp)
+; X86-NEXT:    calll __floatuntihf
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+  %a = uitofp i128 %x to half
+  ret half %a
+}
+
+define i8 @half_to_s8(half %x) {
+; X64-LABEL: half_to_s8:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_s8:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+  %a = fptosi half %x to i8
+  ret i8 %a
+}
+
+define i16 @half_to_s16(half %x) {
+; X64-LABEL: half_to_s16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_s16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %a = fptosi half %x to i16
+  ret i16 %a
+}
+
+define i32 @half_to_s32(half %x) {
+; X64-LABEL: half_to_s32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_s32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+  %a = fptosi half %x to i32
+  ret i32 %a
+}
+
+define i64 @half_to_s64(half %x) {
+; X64-LABEL: half_to_s64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %rax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_s64:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvttph2qq %xmm0, %xmm0
+; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vpextrd $1, %xmm0, %edx
+; X86-NEXT:    retl
+  %a = fptosi half %x to i64
+  ret i64 %a
+}
+
+define i128 @half_to_s128(half %x) {
+; X64-LABEL: half_to_s128:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    callq __fixhfti at PLT
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_s128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    .cfi_offset %esi, -12
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    vmovsh 12(%ebp), %xmm0
+; X86-NEXT:    vmovsh %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __fixhfti
+; X86-NEXT:    subl $4, %esp
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovups %xmm0, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -4(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+  %a = fptosi half %x to i128
+  ret i128 %a
+}
+
+define i8 @half_to_u8(half %x) {
+; X64-LABEL: half_to_u8:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_u8:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+  %a = fptoui half %x to i8
+  ret i8 %a
+}
+
+define i16 @half_to_u16(half %x) {
+; X64-LABEL: half_to_u16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_u16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %a = fptoui half %x to i16
+  ret i16 %a
+}
+
+define i32 @half_to_u32(half %x) {
+; X64-LABEL: half_to_u32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2usi %xmm0, %eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_u32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2usi {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+  %a = fptoui half %x to i32
+  ret i32 %a
+}
+
+define i64 @half_to_u64(half %x) {
+; X64-LABEL: half_to_u64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2usi %xmm0, %rax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_u64:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvttph2uqq %xmm0, %xmm0
+; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vpextrd $1, %xmm0, %edx
+; X86-NEXT:    retl
+  %a = fptoui half %x to i64
+  ret i64 %a
+}
+
+define i128 @half_to_u128(half %x) {
+; X64-LABEL: half_to_u128:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    callq __fixunshfti at PLT
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_u128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    .cfi_offset %esi, -12
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    vmovsh 12(%ebp), %xmm0
+; X86-NEXT:    vmovsh %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __fixunshfti
+; X86-NEXT:    subl $4, %esp
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovups %xmm0, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -4(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+  %a = fptoui half %x to i128
+  ret i128 %a
+}
+
+define x86_fp80 @half_to_f80(half %x) nounwind {
+; X64-LABEL: half_to_f80:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __extendhfxf2 at PLT
+; X64-NEXT:    popq %rax
+; X64-NEXT:    retq
+;
+; X86-LABEL: half_to_f80:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovsh %xmm0, (%esp)
+; X86-NEXT:    calll __extendhfxf2
+; X86-NEXT:    popl %eax
+; X86-NEXT:    retl
+  %a = fpext half %x to x86_fp80
+  ret x86_fp80 %a
+}
+
+define half @f80_to_half(x86_fp80 %x) nounwind {
+; X64-LABEL: f80_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    callq __truncxfhf2 at PLT
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    retq
+;
+; X86-LABEL: f80_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    calll __truncxfhf2
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+  %a = fptrunc x86_fp80 %x to half
+  ret half %a
+}
+
+; FIXME: We're doing a two step conversion here on 32-bit.
+; First from f16->f32 then f32->f128. This is occuring
+; due to common code in LegalizeFloatTypes that thinks
+; there are no libcalls for f16 to any type but f32.
+; Changing this may break other non-x86 targets. The code
+; generated here should be functional.
+define fp128 @half_to_f128(half %x) nounwind {
+; X64-LABEL: half_to_f128:
+; X64:       # %bb.0:
+; X64-NEXT:    jmp __extendhftf2 at PLT # TAILCALL
+;
+; X86-LABEL: half_to_f128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    vmovsh 12(%ebp), %xmm0
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    calll __extendsftf2
+; X86-NEXT:    subl $4, %esp
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovaps %xmm0, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -4(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %a = fpext half %x to fp128
+  ret fp128 %a
+}
+
+define half @f128_to_half(fp128 %x) nounwind {
+; X64-LABEL: f128_to_half:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    callq __trunctfhf2 at PLT
+; X64-NEXT:    popq %rax
+; X64-NEXT:    retq
+;
+; X86-LABEL: f128_to_half:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovups %xmm0, (%esp)
+; X86-NEXT:    calll __trunctfhf2
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    retl
+  %a = fptrunc fp128 %x to half
+  ret half %a
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index eb6511e0edc73..cb31baf9a82ea 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -187,3 +187,506 @@ define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i
   %res13 = and i8 %res11, %res12
   ret i8 %res13
 }
+
+declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %mask = bitcast i16 %x2 to <16 x i1>
+  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %mask = bitcast i16 %x2 to <16 x i1>
+  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
+  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %mask = bitcast i16 %x2 to <16 x i1>
+  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
+  ret <16 x half> %res
+}
+
+define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = sitofp <16 x i32> %x to <16 x half>
+  ret <16 x half> %res
+}
+
+declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %mask = bitcast i16 %x2 to <16 x i1>
+  %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
+  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+  ret <16 x half> %res
+}
+
+define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %mask = bitcast i16 %x2 to <16 x i1>
+  %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
+  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
+  ret <16 x half> %res
+}
+
+define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = uitofp <16 x i32> %x to <16 x half>
+  ret <16 x half> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtph2dq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtph2udq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttph2dq {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)
+
+define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttph2udq {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtph2qq {rn-sae}, %xmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtph2uqq {rn-sae}, %xmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
+  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)
+
+define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
+  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2si %xmm0, %ecx
+; CHECK-NEXT:    vcvtsh2si {rz-sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
+  %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
+  %res = add i32 %res1, %res2
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2si %xmm0, %rcx
+; CHECK-NEXT:    vcvtsh2si {ru-sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
+  %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
+  %res = add i64 %res1, %res2
+  ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttsh2si %xmm0, %ecx
+; CHECK-NEXT:    vcvttsh2si {sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
+  %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
+  %res = add i32 %res1, %res2
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttsh2si %xmm0, %rcx
+; CHECK-NEXT:    vcvttsh2si {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
+  %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
+  %res = add i64 %res1, %res2
+  ret i64 %res
+}
+
+
+declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2usi %xmm0, %ecx
+; CHECK-NEXT:    vcvtsh2usi {rd-sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
+  %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
+  %res = add i32 %res1, %res2
+  ret i32 %res
+}
+
+
+declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvtsh2usi {ru-sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
+  %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
+  %res = add i64 %res1, %res2
+  ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)
+
+define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttsh2usi %xmm0, %ecx
+; CHECK-NEXT:    vcvttsh2usi {sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
+  %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
+  %res = add i32 %res1, %res2
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)
+
+define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttsh2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvttsh2usi {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
+  %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
+  %res = add i64 %res1, %res2
+  ret i64 %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm1
+; CHECK-NEXT:    vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
+  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
+  %res = fadd <8 x half> %res1, %res2
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm1
+; CHECK-NEXT:    vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
+  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
+  %res = fadd <8 x half> %res1, %res2
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm1
+; CHECK-NEXT:    vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
+  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
+  %res = fadd <8 x half> %res1, %res2
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)
+
+define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
+; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm1
+; CHECK-NEXT:    vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
+  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
+  %res = fadd <8 x half> %res1, %res2
+  ret <8 x half> %res
+}

diff  --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
index 42a5eecadda67..6bfd69ef5db97 100644
--- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
@@ -23,3 +23,925 @@ entry:
   %0 = bitcast <8 x i16> %vecinit7.i to <2 x i64>
   ret <2 x i64> %0
 }
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = sitofp <8 x i32> %x0 to <8 x half>
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = sitofp <8 x i32> %x0 to <8 x half>
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_8i32_to_8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = sitofp <8 x i32> %x to <8 x half>
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_4i32_to_4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <4 x i32> %x to <4 x half>
+  ret <4 x half> %res
+}
+
+define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) {
+; CHECK-LABEL: sint_to_fp_2i32_to_2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <2 x i32> %x to <2 x half>
+  ret <2 x half> %res
+}
+
+define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) {
+; CHECK-LABEL: fp_to_sint_4f16_to_4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptosi <4 x half> %x to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptosi <2 x half> %x to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptosi <2 x half> %x to <2 x i16>
+  ret <2 x i16> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = uitofp <8 x i32> %x0 to <8 x half>
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %mask = bitcast i8 %x2 to <8 x i1>
+  %res0 = uitofp <8 x i32> %x0 to <8 x half>
+  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
+  ret <8 x half> %res
+}
+
+define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_8i32_to_8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = uitofp <8 x i32> %x to <8 x half>
+  ret <8 x half> %res
+}
+
+define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) {
+; CHECK-LABEL: fp_to_uint_8f16_to_8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = fptoui <8 x half> %x to <8 x i32>
+  ret <8 x i32> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_4i32_to_4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <4 x i32> %x to <4 x half>
+  ret <4 x half> %res
+}
+
+define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) {
+; CHECK-LABEL: uint_to_fp_2i32_to_2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <2 x i32> %x to <2 x half>
+  ret <2 x half> %res
+}
+
+define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) {
+; CHECK-LABEL: fp_to_uint_4f16_to_4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptoui <4 x half> %x to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptoui <2 x half> %x to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptoui <2 x half> %x to <2 x i16>
+  ret <2 x i16> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8)
+
+define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2)
+  ret <8 x i32> %res
+}
+
+declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8)
+
+define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1)
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8)
+
+define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1)
+  ret <2 x double> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(<4 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtpd2phy (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <4 x double>, <4 x double>* %px0, align 32
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(<2 x double>* %px0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vcvtpd2phx (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %x0 = load <2 x double>, <2 x double>* %px0, align 16
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) {
+; CHECK-LABEL: sint_to_fp_4i64_to_4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = sitofp <4 x i64> %x to <4 x half>
+  ret <4 x half> %res
+}
+
+define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) {
+; CHECK-LABEL: fp_to_sint_4f16_to_4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = fptosi <4 x half> %x to <4 x i64>
+  ret <4 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) {
+; CHECK-LABEL: sint_to_fp_2i64_to_2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = sitofp <2 x i64> %x to <2 x half>
+  ret <2 x half> %res
+}
+
+define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) {
+; CHECK-LABEL: fp_to_sint_2f16_to_2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptosi <2 x half> %x to <2 x i64>
+  ret <2 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) {
+; CHECK-LABEL: uint_to_fp_4i64_to_4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = uitofp <4 x i64> %x to <4 x half>
+  ret <4 x half> %res
+}
+
+define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) {
+; CHECK-LABEL: fp_to_uint_4f16_to_4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = fptoui <4 x half> %x to <4 x i64>
+  ret <4 x i64> %res
+}
+
+declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8)
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1)
+  ret <8 x half> %res
+}
+
+define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2)
+  ret <8 x half> %res
+}
+
+define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) {
+; CHECK-LABEL: uint_to_fp_2i64_to_2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = uitofp <2 x i64> %x to <2 x half>
+  ret <2 x half> %res
+}
+
+define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) {
+; CHECK-LABEL: fp_to_uint_2f16_to_2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = fptoui <2 x half> %x to <2 x i64>
+  ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
+  ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8)
+
+define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
+  ret <4 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2)
+  ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8)
+
+define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2)
+  ret <4 x i64> %res
+}

diff  --git a/llvm/test/CodeGen/X86/cvt16-2.ll b/llvm/test/CodeGen/X86/cvt16-2.ll
new file mode 100644
index 0000000000000..67111e838cab8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cvt16-2.ll
@@ -0,0 +1,157 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-avx512fp16 | FileCheck %s -check-prefix=LIBCALL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512fp16 | FileCheck %s -check-prefix=FP16
+
+define void @test1(float %src, i16* %dest) {
+; LIBCALL-LABEL: test1:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    pushq %rbx
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; LIBCALL-NEXT:    .cfi_offset %rbx, -16
+; LIBCALL-NEXT:    movq %rdi, %rbx
+; LIBCALL-NEXT:    callq __gnu_f2h_ieee at PLT
+; LIBCALL-NEXT:    movw %ax, (%rbx)
+; LIBCALL-NEXT:    popq %rbx
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 8
+; LIBCALL-NEXT:    retq
+;
+; FP16-LABEL: test1:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    vmovsh %xmm0, (%rdi)
+; FP16-NEXT:    retq
+  %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
+  store i16 %1, i16* %dest, align 2
+  ret void
+}
+
+define float @test2(i16* nocapture %src) {
+; LIBCALL-LABEL: test2:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    movzwl (%rdi), %edi
+; LIBCALL-NEXT:    jmp __gnu_h2f_ieee at PLT # TAILCALL
+;
+; FP16-LABEL: test2:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vmovsh (%rdi), %xmm0
+; FP16-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    retq
+  %1 = load i16, i16* %src, align 2
+  %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
+  ret float %2
+}
+
+define float @test3(float %src) nounwind uwtable readnone {
+; LIBCALL-LABEL: test3:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    pushq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; LIBCALL-NEXT:    callq __gnu_f2h_ieee at PLT
+; LIBCALL-NEXT:    movzwl %ax, %edi
+; LIBCALL-NEXT:    popq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 8
+; LIBCALL-NEXT:    jmp __gnu_h2f_ieee at PLT # TAILCALL
+;
+; FP16-LABEL: test3:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    retq
+  %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
+  %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
+  ret float %2
+}
+
+; FIXME: Should it be __extendhfdf2?
+define double @test4(i16* nocapture %src) {
+; LIBCALL-LABEL: test4:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    pushq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; LIBCALL-NEXT:    movzwl (%rdi), %edi
+; LIBCALL-NEXT:    callq __gnu_h2f_ieee at PLT
+; LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
+; LIBCALL-NEXT:    popq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 8
+; LIBCALL-NEXT:    retq
+;
+; FP16-LABEL: test4:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vmovsh (%rdi), %xmm0
+; FP16-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    retq
+  %1 = load i16, i16* %src, align 2
+  %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
+  ret double %2
+}
+
+define i16 @test5(double %src) {
+; LIBCALL-LABEL: test5:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    jmp __truncdfhf2 at PLT # TAILCALL
+;
+; FP16-LABEL: test5:
+; FP16:       # %bb.0:
+; FP16-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT:    vmovw %xmm0, %eax
+; FP16-NEXT:    # kill: def $ax killed $ax killed $eax
+; FP16-NEXT:    retq
+  %val = tail call i16 @llvm.convert.to.fp16.f64(double %src)
+  ret i16 %val
+}
+
+; FIXME: Should it be __extendhfxf2?
+define x86_fp80 @test6(i16* nocapture %src) {
+; LIBCALL-LABEL: test6:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    pushq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; LIBCALL-NEXT:    movzwl (%rdi), %edi
+; LIBCALL-NEXT:    callq __gnu_h2f_ieee at PLT
+; LIBCALL-NEXT:    movss %xmm0, {{[0-9]+}}(%rsp)
+; LIBCALL-NEXT:    flds {{[0-9]+}}(%rsp)
+; LIBCALL-NEXT:    popq %rax
+; LIBCALL-NEXT:    .cfi_def_cfa_offset 8
+; LIBCALL-NEXT:    retq
+;
+; FP16-LABEL: test6:
+; FP16:       # %bb.0:
+; FP16-NEXT:    pushq %rax
+; FP16-NEXT:    .cfi_def_cfa_offset 16
+; FP16-NEXT:    vmovsh (%rdi), %xmm0
+; FP16-NEXT:    callq __extendhfxf2 at PLT
+; FP16-NEXT:    popq %rax
+; FP16-NEXT:    .cfi_def_cfa_offset 8
+; FP16-NEXT:    retq
+  %1 = load i16, i16* %src, align 2
+  %2 = tail call x86_fp80 @llvm.convert.from.fp16.f80(i16 %1)
+  ret x86_fp80 %2
+}
+
+define i16 @test7(x86_fp80 %src) {
+; LIBCALL-LABEL: test7:
+; LIBCALL:       # %bb.0:
+; LIBCALL-NEXT:    jmp __truncxfhf2 at PLT # TAILCALL
+;
+; FP16-LABEL: test7:
+; FP16:       # %bb.0:
+; FP16-NEXT:    subq $24, %rsp
+; FP16-NEXT:    .cfi_def_cfa_offset 32
+; FP16-NEXT:    fldt {{[0-9]+}}(%rsp)
+; FP16-NEXT:    fstpt (%rsp)
+; FP16-NEXT:    callq __truncxfhf2 at PLT
+; FP16-NEXT:    vmovw %xmm0, %eax
+; FP16-NEXT:    # kill: def $ax killed $ax killed $eax
+; FP16-NEXT:    addq $24, %rsp
+; FP16-NEXT:    .cfi_def_cfa_offset 8
+; FP16-NEXT:    retq
+  %val = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src)
+  ret i16 %val
+}
+
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
+declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
+declare x86_fp80 @llvm.convert.from.fp16.f80(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f80(x86_fp80) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
index 9d58a262dcc6c..a2c289f1a26e4 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
@@ -6,6 +6,10 @@ declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metad
 declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata)
 declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
 declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
 
 define half @fadd_f16(half %a, half %b) nounwind strictfp {
 ; X86-LABEL: fadd_f16:
@@ -75,4 +79,98 @@ define half @fdiv_f16(half %a, half %b) nounwind strictfp {
   ret half %ret
 }
 
+define void @fpext_f16_to_f32(half* %val, float* %ret) nounwind strictfp {
+; X86-LABEL: fpext_f16_to_f32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsh (%ecx), %xmm0
+; X86-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovss %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: fpext_f16_to_f32:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh (%rdi), %xmm0
+; X64-NEXT:    vcvtsh2ss %xmm0, %xmm0, %xmm0
+; X64-NEXT:    vmovss %xmm0, (%rsi)
+; X64-NEXT:    retq
+  %1 = load half, half* %val, align 4
+  %res = call float @llvm.experimental.constrained.fpext.f32.f16(half %1,
+                                                                 metadata !"fpexcept.strict") #0
+  store float %res, float* %ret, align 8
+  ret void
+}
+
+define void @fpext_f16_to_f64(half* %val, double* %ret) nounwind strictfp {
+; X86-LABEL: fpext_f16_to_f64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsh (%ecx), %xmm0
+; X86-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: fpext_f16_to_f64:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh (%rdi), %xmm0
+; X64-NEXT:    vcvtsh2sd %xmm0, %xmm0, %xmm0
+; X64-NEXT:    vmovsd %xmm0, (%rsi)
+; X64-NEXT:    retq
+  %1 = load half, half* %val, align 4
+  %res = call double @llvm.experimental.constrained.fpext.f64.f16(half %1,
+                                                                  metadata !"fpexcept.strict") #0
+  store double %res, double* %ret, align 8
+  ret void
+}
+
+define void @fptrunc_float_to_f16(float* %val, half *%ret) nounwind strictfp {
+; X86-LABEL: fptrunc_float_to_f16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovsh %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptrunc_float_to_f16:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT:    vmovsh %xmm0, (%rsi)
+; X64-NEXT:    retq
+  %1 = load float, float* %val, align 8
+  %res = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %1,
+                                                                  metadata !"round.dynamic",
+                                                                  metadata !"fpexcept.strict") #0
+  store half %res, half* %ret, align 4
+  ret void
+}
+
+define void @fptrunc_double_to_f16(double* %val, half *%ret) nounwind strictfp {
+; X86-LABEL: fptrunc_double_to_f16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovsh %xmm0, (%eax)
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptrunc_double_to_f16:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; X64-NEXT:    vmovsh %xmm0, (%rsi)
+; X64-NEXT:    retq
+  %1 = load double, double* %val, align 8
+  %res = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %1,
+                                                                  metadata !"round.dynamic",
+                                                                  metadata !"fpexcept.strict") #0
+  store half %res, half* %ret, align 4
+  ret void
+}
+
 attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
new file mode 100644
index 0000000000000..9ea19ca318816
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
@@ -0,0 +1,184 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
+
+declare i1  @llvm.experimental.constrained.fptosi.i1.f16(half, metadata)
+declare i8  @llvm.experimental.constrained.fptosi.i8.f16(half, metadata)
+declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata)
+declare i1  @llvm.experimental.constrained.fptoui.i1.f16(half, metadata)
+declare i8  @llvm.experimental.constrained.fptoui.i8.f16(half, metadata)
+declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata)
+
+define i1 @fptosi_f16toi1(half %x) #0 {
+; X86-LABEL: fptosi_f16toi1:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptosi_f16toi1:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %result = call i1 @llvm.experimental.constrained.fptosi.i1.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i1 %result
+}
+
+define i8 @fptosi_f16toi8(half %x) #0 {
+; X86-LABEL: fptosi_f16toi8:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptosi_f16toi8:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %result = call i8 @llvm.experimental.constrained.fptosi.i8.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i8 %result
+}
+
+define i16 @fptosi_f16toi16(half %x) #0 {
+; X86-LABEL: fptosi_f16toi16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptosi_f16toi16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %result = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i16 %result
+}
+
+define i32 @fptosi_f16toi32(half %x) #0 {
+; X86-LABEL: fptosi_f16toi32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptosi_f16toi32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    retq
+  %result = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i32 %result
+}
+
+define i64 @fptosi_f16toi64(half %x) #0 {
+; X86-LABEL: fptosi_f16toi64:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvttph2qq %xmm0, %xmm0
+; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vpextrd $1, %xmm0, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptosi_f16toi64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %rax
+; X64-NEXT:    retq
+  %result = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i64 %result
+}
+
+define i1 @fptoui_f16toi1(half %x) #0 {
+; X86-LABEL: fptoui_f16toi1:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptoui_f16toi1:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %result = call i1 @llvm.experimental.constrained.fptoui.i1.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i1 %result
+}
+
+define i8 @fptoui_f16toi8(half %x) #0 {
+; X86-LABEL: fptoui_f16toi8:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptoui_f16toi8:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+  %result = call i8 @llvm.experimental.constrained.fptoui.i8.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i8 %result
+}
+
+define i16 @fptoui_f16toi16(half %x) #0 {
+; X86-LABEL: fptoui_f16toi16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2si {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptoui_f16toi16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2si %xmm0, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %result = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i16 %result
+}
+
+define i32 @fptoui_f16toi32(half %x) #0 {
+; X86-LABEL: fptoui_f16toi32:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvttsh2usi {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptoui_f16toi32:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2usi %xmm0, %eax
+; X64-NEXT:    retq
+  %result = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i32 %result
+}
+
+define i64 @fptoui_f16toi64(half %x) #0 {
+; X86-LABEL: fptoui_f16toi64:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vcvttph2uqq %xmm0, %xmm0
+; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vpextrd $1, %xmm0, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: fptoui_f16toi64:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvttsh2usi %xmm0, %rax
+; X64-NEXT:    retq
+  %result = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x,
+                                               metadata !"fpexcept.strict") #0
+  ret i64 %result
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
new file mode 100644
index 0000000000000..58b6068ea53ac
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64
+
+declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata)
+
+define half @sitofp_i1tof16(i1 %x) #0 {
+; X86-LABEL: sitofp_i1tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $1, %al
+; X86-NEXT:    negb %al
+; X86-NEXT:    movsbl %al, %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_i1tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    andb $1, %dil
+; X64-NEXT:    negb %dil
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @sitofp_i8tof16(i8 %x) #0 {
+; X86-LABEL: sitofp_i8tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_i8tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @sitofp_i16tof16(i16 %x) #0 {
+; X86-LABEL: sitofp_i16tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_i16tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @sitofp_i32tof16(i32 %x) #0 {
+; X86-LABEL: sitofp_i32tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_i32tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @sitofp_i64tof16(i64 %x) #0 {
+; X86-LABEL: sitofp_i64tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtqq2ph %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sitofp_i64tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @uitofp_i1tof16(i1 %x) #0 {
+; X86-LABEL: uitofp_i1tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $1, %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_i1tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    andl $1, %edi
+; X64-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @uitofp_i8tof16(i8 %x) #0 {
+; X86-LABEL: uitofp_i8tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_i8tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @uitofp_i16tof16(i16 %x) #0 {
+; X86-LABEL: uitofp_i16tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_i16tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    vcvtsi2sh %eax, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @uitofp_i32tof16(i32 %x) #0 {
+; X86-LABEL: uitofp_i32tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_i32tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+define half @uitofp_i64tof16(i64 %x) #0 {
+; X86-LABEL: uitofp_i64tof16:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_i64tof16:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm0
+; X64-NEXT:    retq
+  %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret half %result
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
index bd9706839943a..fab7059dd959d 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
@@ -144,5 +144,21 @@ define <16 x half> @stack_fold_mulph_ymm(<16 x half> %a0, <16 x half> %a1) {
   ret <16 x half> %2
 }
 
+define <8 x half> @stack_fold_subph(<8 x half> %a0, <8 x half> %a1) {
+  ;CHECK-LABEL: stack_fold_subph
+  ;CHECK:       vsubph {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = fsub <8 x half> %a0, %a1
+  ret <8 x half> %2
+}
+
+define <16 x half> @stack_fold_subph_ymm(<16 x half> %a0, <16 x half> %a1) {
+  ;CHECK-LABEL: stack_fold_subph_ymm
+  ;CHECK:       vsubph {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = fsub <16 x half> %a0, %a1
+  ret <16 x half> %2
+}
+
 attributes #0 = { "unsafe-fp-math"="false" }
 attributes #1 = { "unsafe-fp-math"="true" }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
index 7657b769fa1b7..7171ac32336d3 100644
--- a/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-128-fp16.ll
@@ -6,6 +6,16 @@ declare <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half>, <8 x ha
 declare <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half>, <8 x half>, metadata, metadata)
 declare <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half>, <8 x half>, metadata, metadata)
 declare <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half>, <8 x half>, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
+declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata)
+declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata)
 
 define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-LABEL: f2:
@@ -51,4 +61,130 @@ define <8 x half> @f8(<8 x half> %a, <8 x half> %b) #0 {
   ret <8 x half> %ret
 }
 
+define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f11:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsd2sh %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ext = extractelement <2 x double> %a0, i32 0
+  %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext,
+                                                                  metadata !"round.dynamic",
+                                                                  metadata !"fpexcept.strict") #0
+  %res = insertelement <8 x half> %a1, half %cvt, i32 0
+  ret <8 x half> %res
+}
+
+define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2sd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ext = extractelement <8 x half> %a1, i32 0
+  %cvt = call double @llvm.experimental.constrained.fpext.f64.f16(half %ext,
+                                                                  metadata !"fpexcept.strict") #0
+  %res = insertelement <2 x double> %a0, double %cvt, i32 0
+  ret <2 x double> %res
+}
+
+define <2 x double> @f15(<2 x half> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(
+                                <2 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <2 x double> %ret
+}
+
+define <2 x half> @f16(<2 x double> %a) #0 {
+; CHECK-LABEL: f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(
+                                <2 x double> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <2 x half> %ret
+}
+
+define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f17:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtss2sh %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ext = extractelement <4 x float> %a0, i32 0
+  %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext,
+                                                                  metadata !"round.dynamic",
+                                                                  metadata !"fpexcept.strict") #0
+  %res = insertelement <8 x half> %a1, half %cvt, i32 0
+  ret <8 x half> %res
+}
+
+define <4 x float> @f18(<4 x float> %a0, <8 x half> %a1) #0 {
+; CHECK-LABEL: f18:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtsh2ss %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ext = extractelement <8 x half> %a1, i32 0
+  %cvt = call float @llvm.experimental.constrained.fpext.f32.f16(half %ext,
+                                                                  metadata !"fpexcept.strict") #0
+  %res = insertelement <4 x float> %a0, float %cvt, i32 0
+  ret <4 x float> %res
+}
+
+define <2 x float> @f19(<2 x half> %a) #0 {
+; CHECK-LABEL: f19:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(
+                                <2 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <2 x float> %ret
+}
+
+define <4 x float> @f20(<4 x half> %a) #0 {
+; CHECK-LABEL: f20:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(
+                                <4 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <4 x float> %ret
+}
+
+define <2 x half> @f21(<2 x float> %a) #0 {
+; CHECK-LABEL: f21:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(
+                                <2 x float> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <2 x half> %ret
+}
+
+define <4 x half> @f22(<4 x float> %a) #0 {
+; CHECK-LABEL: f22:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(
+                                <4 x float> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <4 x half> %ret
+}
+
 attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
index d94003aab9daa..8b78a5b5c492c 100644
--- a/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-256-fp16.ll
@@ -6,6 +6,10 @@ declare <16 x half> @llvm.experimental.constrained.fadd.v16f16(<16 x half>, <16
 declare <16 x half> @llvm.experimental.constrained.fsub.v16f16(<16 x half>, <16 x half>, metadata, metadata)
 declare <16 x half> @llvm.experimental.constrained.fmul.v16f16(<16 x half>, <16 x half>, metadata, metadata)
 declare <16 x half> @llvm.experimental.constrained.fdiv.v16f16(<16 x half>, <16 x half>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half>, metadata)
+declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half>, metadata)
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(<4 x double>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(<8 x float>, metadata, metadata)
 
 define <16 x half> @f2(<16 x half> %a, <16 x half> %b) #0 {
 ; CHECK-LABEL: f2:
@@ -51,4 +55,52 @@ define <16 x half> @f8(<16 x half> %a, <16 x half> %b) #0 {
   ret <16 x half> %ret
 }
 
+define <4 x double> @f11(<4 x half> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(
+                                <4 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <4 x double> %ret
+}
+
+define <4 x half> @f12(<4 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f64(
+                                <4 x double> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <4 x half> %ret
+}
+
+define <8 x float> @f14(<8 x half> %a) #0 {
+; CHECK-LABEL: f14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(
+                                <8 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <8 x float> %ret
+}
+
+define <8 x half> @f15(<8 x float> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f32(
+                                <8 x float> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <8 x half> %ret
+}
+
 attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
index 4a5c8ca00b5f7..0a25d1c9d3d01 100644
--- a/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-512-fp16.ll
@@ -6,6 +6,10 @@ declare <32 x half> @llvm.experimental.constrained.fadd.v32f16(<32 x half>, <32
 declare <32 x half> @llvm.experimental.constrained.fsub.v32f16(<32 x half>, <32 x half>, metadata, metadata)
 declare <32 x half> @llvm.experimental.constrained.fmul.v32f16(<32 x half>, <32 x half>, metadata, metadata)
 declare <32 x half> @llvm.experimental.constrained.fdiv.v32f16(<32 x half>, <32 x half>, metadata, metadata)
+declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(<8 x half>, metadata)
+declare <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(<16 x half>, metadata)
+declare <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(<8 x double>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(<16 x float>, metadata, metadata)
 
 define <32 x half> @f2(<32 x half> %a, <32 x half> %b) #0 {
 ; CHECK-LABEL: f2:
@@ -51,4 +55,51 @@ define <32 x half> @f8(<32 x half> %a, <32 x half> %b) #0 {
   ret <32 x half> %ret
 }
 
+define <8 x double> @f11(<8 x half> %a) #0 {
+; CHECK-LABEL: f11:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2pd %xmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f16(
+                                <8 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <8 x double> %ret
+}
+
+define <8 x half> @f12(<8 x double> %a) #0 {
+; CHECK-LABEL: f12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtpd2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x half> @llvm.experimental.constrained.fptrunc.v8f16.v8f64(
+                                <8 x double> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <8 x half> %ret
+}
+
+define <16 x float> @f14(<16 x half> %a) #0 {
+; CHECK-LABEL: f14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtph2psx %ymm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x float> @llvm.experimental.constrained.fpext.v16f32.v16f16(
+                                <16 x half> %a,
+                                metadata !"fpexcept.strict") #0
+  ret <16 x float> %ret
+}
+
+define <16 x half> @f15(<16 x float> %a) #0 {
+; CHECK-LABEL: f15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtps2phx %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x half> @llvm.experimental.constrained.fptrunc.v16f16.v16f32(
+                                <16 x float> %a,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict") #0
+  ret <16 x half> %ret
+}
+
 attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
new file mode 100644
index 0000000000000..441fd8926acd0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+
+declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half>, metadata)
+declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half>, metadata)
+declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half>, metadata)
+declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half>, metadata)
+declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half>, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half>, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half>, metadata)
+declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half>, metadata)
+declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half>, metadata)
+declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half>, metadata)
+declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half>, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half>, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half>, metadata)
+declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half>, metadata)
+declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half>, metadata)
+declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half>, metadata)
+declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half>, metadata)
+declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half>, metadata)
+declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half>, metadata)
+
+define <2 x i64> @strict_vector_fptosi_v2f16_to_v2i64(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2qq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @strict_vector_fptoui_v2f16_to_v2i64(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i64> %ret
+}
+
+define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i32> %ret
+}
+
+define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i16> %ret
+}
+
+define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i16> %ret
+}
+
+define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i8> %ret
+}
+
+define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i8> %ret
+}
+
+define <2 x i1> @strict_vector_fptosi_v2f16_to_v2i1(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v2f16_to_v2i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @strict_vector_fptoui_v2f16_to_v2i1(<2 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v2f16_to_v2i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f16(<2 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <2 x i1> %ret
+}
+
+define <4 x i32> @strict_vector_fptosi_v4f16_to_v4i32(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2dq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @strict_vector_fptoui_v4f16_to_v4i32(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2udq %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i32> %ret
+}
+
+define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i16> %ret
+}
+
+define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i16> %ret
+}
+
+define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i8> %ret
+}
+
+define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i8> %ret
+}
+
+define <4 x i1> @strict_vector_fptosi_v4f16_to_v4i1(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i1> %ret
+}
+
+define <4 x i1> @strict_vector_fptoui_v4f16_to_v4i1(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i1> %ret
+}
+
+define <8 x i16> @strict_vector_fptosi_v8f16_to_v8i16(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i16> %ret
+}
+
+define <8 x i16> @strict_vector_fptoui_v8f16_to_v8i16(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i16> %ret
+}
+
+define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i8> %ret
+}
+
+define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %xmm0, %xmm0
+; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i8> %ret
+}
+
+define <8 x i1> @strict_vector_fptosi_v8f16_to_v8i1(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT:    vpmovd2m %ymm0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i1> %ret
+}
+
+define <8 x i1> @strict_vector_fptoui_v8f16_to_v8i1(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT:    vpslld $31, %ymm0, %ymm0
+; CHECK-NEXT:    vpmovd2m %ymm0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i1> %ret
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
new file mode 100644
index 0000000000000..bc0dd022bfae4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK
+
+
+declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half>, metadata)
+declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half>, metadata)
+declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half>, metadata)
+declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half>, metadata)
+declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half>, metadata)
+declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half>, metadata)
+declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half>, metadata)
+declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half>, metadata)
+declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half>, metadata)
+
+define <4 x i64> @strict_vector_fptosi_v4f16_to_v4i64(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v4f16_to_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2qq %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @strict_vector_fptoui_v4f16_to_v4i64(<4 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v4f16_to_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f16(<4 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <4 x i64> %ret
+}
+
+define <8 x i32> @strict_vector_fptosi_v8f16_to_v8i32(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @strict_vector_fptoui_v8f16_to_v8i32(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %xmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i32> %ret
+}
+
+define <16 x i16> @strict_vector_fptosi_v16f16_to_v16i16(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @strict_vector_fptoui_v16f16_to_v16i16(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i16> %ret
+}
+
+define <16 x i8> @strict_vector_fptosi_v16f16_to_v16i8(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i8> %ret
+}
+
+define <16 x i8> @strict_vector_fptoui_v16f16_to_v16i8(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i8> %ret
+}
+
+define <16 x i1> @strict_vector_fptosi_v16f16_to_v16i1(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpmovd2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i1> %ret
+}
+
+define <16 x i1> @strict_vector_fptoui_v16f16_to_v16i1(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    vpslld $31, %zmm0, %zmm0
+; CHECK-NEXT:    vpmovd2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i1> %ret
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
new file mode 100644
index 0000000000000..dc8823710291e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK
+
+
+declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half>, metadata)
+declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half>, metadata)
+declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half>, metadata)
+declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half>, metadata)
+declare <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half>, metadata)
+declare <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half>, metadata)
+declare <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half>, metadata)
+declare <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half>, metadata)
+declare <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half>, metadata)
+declare <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half>, metadata)
+
+define <8 x i64> @strict_vector_fptosi_v8f16_to_v8i64(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v8f16_to_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2qq %xmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @strict_vector_fptoui_v8f16_to_v8i64(<8 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v8f16_to_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uqq %xmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f16(<8 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <8 x i64> %ret
+}
+
+define <16 x i32> @strict_vector_fptosi_v16f16_to_v16i32(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v16f16_to_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @strict_vector_fptoui_v16f16_to_v16i32(<16 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v16f16_to_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2udq %ymm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f16(<16 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <16 x i32> %ret
+}
+
+define <32 x i16> @strict_vector_fptosi_v32f16_to_v32i16(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i16> @llvm.experimental.constrained.fptosi.v32i16.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i16> %ret
+}
+
+define <32 x i16> @strict_vector_fptoui_v32f16_to_v32i16(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2uw %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i16> @llvm.experimental.constrained.fptoui.v32i16.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i16> %ret
+}
+
+define <32 x i8> @strict_vector_fptosi_v32f16_to_v32i8(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    vpmovwb %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i8> @llvm.experimental.constrained.fptosi.v32i8.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i8> %ret
+}
+
+define <32 x i8> @strict_vector_fptoui_v32f16_to_v32i8(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    vpmovwb %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i8> @llvm.experimental.constrained.fptoui.v32i8.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i8> %ret
+}
+
+define <32 x i1> @strict_vector_fptosi_v32f16_to_v32i1(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptosi_v32f16_to_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    vpmovw2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i1> @llvm.experimental.constrained.fptosi.v32i1.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i1> %ret
+}
+
+define <32 x i1> @strict_vector_fptoui_v32f16_to_v32i1(<32 x half> %a) #0 {
+; CHECK-LABEL: strict_vector_fptoui_v32f16_to_v32i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvttph2w %zmm0, %zmm0
+; CHECK-NEXT:    vpsllw $15, %zmm0, %zmm0
+; CHECK-NEXT:    vpmovw2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %ret = call <32 x i1> @llvm.experimental.constrained.fptoui.v32i1.v32f16(<32 x half> %a,
+                                              metadata !"fpexcept.strict") #0
+  ret <32 x i1> %ret
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
new file mode 100644
index 0000000000000..7e10ab56faae1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
+declare <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64>, metadata, metadata)
+
+define <4 x half> @sitofp_v4i32_v4f16(<4 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v4i32_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i32(<4 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x half> %result
+}
+
+define <4 x half> @uitofp_v4i32_v4f16(<4 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v4i32_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i32(<4 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x half> %result
+}
+
+define <2 x half> @sitofp_v2i64_v2f16(<2 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v2i64_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <2 x half> @llvm.experimental.constrained.sitofp.v2f16.v2i64(<2 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <2 x half> %result
+}
+
+define <2 x half> @uitofp_v2i64_v2f16(<2 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v2i64_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <2 x half> @llvm.experimental.constrained.uitofp.v2f16.v2i64(<2 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <2 x half> %result
+}
+
+define <8 x half> @sitofp_v8i1_v8f16(<8 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v8i1_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpsraw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i1(<8 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i1_v8f16(<8 x i1> %x) #0 {
+; X86-LABEL: uitofp_v8i1_v8f16:
+; X86:       # %bb.0:
+; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_v8i1_v8f16:
+; X64:       # %bb.0:
+; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; X64-NEXT:    retq
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i1(<8 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @sitofp_v8i8_v8f16(<8 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v8i8_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i8(<8 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i8_v8f16(<8 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v8i8_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i8(<8 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @sitofp_v8i16_v8f16(<8 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v8i16_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i16(<8 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i16_v8f16(<8 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v8i16_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %xmm0, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i16(<8 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
new file mode 100644
index 0000000000000..9eaef5a772fd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256-fp16.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
+declare <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64>, metadata, metadata)
+
+define <16 x half> @sitofp_v16i1_v16f16(<16 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v16i1_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT:    vpsllw $15, %ymm0, %ymm0
+; CHECK-NEXT:    vpsraw $15, %ymm0, %ymm0
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i1(<16 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i1_v16f16(<16 x i1> %x) #0 {
+; X86-LABEL: uitofp_v16i1_v16f16:
+; X86:       # %bb.0:
+; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; X86-NEXT:    vcvtuw2ph %ymm0, %ymm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_v16i1_v16f16:
+; X64:       # %bb.0:
+; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; X64-NEXT:    vcvtuw2ph %ymm0, %ymm0
+; X64-NEXT:    retq
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i1(<16 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @sitofp_v16i8_v16f16(<16 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v16i8_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i8(<16 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i8_v16f16(<16 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v16i8_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i8(<16 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @sitofp_v16i16_v16f16(<16 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v16i16_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i16(<16 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i16_v16f16(<16 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v16i16_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %ymm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i16(<16 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <8 x half> @sitofp_v8i32_v8f16(<8 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v8i32_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i32(<8 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i32_v8f16(<8 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v8i32_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i32(<8 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <4 x half> @sitofp_v4i64_v4f16(<4 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v4i64_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.sitofp.v4f16.v4i64(<4 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x half> %result
+}
+
+define <4 x half> @uitofp_v4i64_v4f16(<4 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v4i64_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <4 x half> @llvm.experimental.constrained.uitofp.v4f16.v4i64(<4 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <4 x half> %result
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
new file mode 100644
index 0000000000000..c807af0932b56
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -O3 | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
+declare <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
+declare <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
+declare <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64>, metadata, metadata)
+
+define <32 x half> @sitofp_v32i1_v32f16(<32 x i1> %x) #0 {
+; CHECK-LABEL: sitofp_v32i1_v32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; CHECK-NEXT:    vpsllw $15, %zmm0, %zmm0
+; CHECK-NEXT:    vpsraw $15, %zmm0, %zmm0
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i1(<32 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i1_v32f16(<32 x i1> %x) #0 {
+; X86-LABEL: uitofp_v32i1_v32f16:
+; X86:       # %bb.0:
+; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; X86-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: uitofp_v32i1_v32f16:
+; X64:       # %bb.0:
+; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; X64-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; X64-NEXT:    retq
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i1(<32 x i1> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <32 x half> @sitofp_v32i8_v32f16(<32 x i8> %x) #0 {
+; CHECK-LABEL: sitofp_v32i8_v32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovsxbw %ymm0, %zmm0
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i8(<32 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i8_v32f16(<32 x i8> %x) #0 {
+; CHECK-LABEL: uitofp_v32i8_v32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i8(<32 x i8> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <32 x half> @sitofp_v32i16_v32f16(<32 x i16> %x) #0 {
+; CHECK-LABEL: sitofp_v32i16_v32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtw2ph %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.sitofp.v32f16.v32i16(<32 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <32 x half> @uitofp_v32i16_v32f16(<32 x i16> %x) #0 {
+; CHECK-LABEL: uitofp_v32i16_v32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuw2ph %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <32 x half> @llvm.experimental.constrained.uitofp.v32f16.v32i16(<32 x i16> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <32 x half> %result
+}
+
+define <16 x half> @sitofp_v16i32_v16f16(<16 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v16i32_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.sitofp.v16f16.v16i32(<16 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <16 x half> @uitofp_v16i32_v16f16(<16 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v16i32_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <16 x half> @llvm.experimental.constrained.uitofp.v16f16.v16i32(<16 x i32> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <16 x half> %result
+}
+
+define <8 x half> @sitofp_v8i64_v8f16(<8 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v8i64_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.sitofp.v8f16.v8i64(<8 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+define <8 x half> @uitofp_v8i64_v8f16(<8 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v8i64_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    ret{{[l|q]}}
+ %result = call <8 x half> @llvm.experimental.constrained.uitofp.v8f16.v8i64(<8 x i64> %x,
+                                                              metadata !"round.dynamic",
+                                                              metadata !"fpexcept.strict") #0
+  ret <8 x half> %result
+}
+
+attributes #0 = { strictfp }

diff  --git a/llvm/test/MC/Disassembler/X86/avx512fp16.txt b/llvm/test/MC/Disassembler/X86/avx512fp16.txt
index 6ba043ecd1be2..8115431808335 100644
--- a/llvm/test/MC/Disassembler/X86/avx512fp16.txt
+++ b/llvm/test/MC/Disassembler/X86/avx512fp16.txt
@@ -460,3 +460,899 @@
 # ATT:   vucomish  -256(%rdx), %xmm30
 # INTEL: vucomish xmm30, word ptr [rdx - 256]
 0x62,0x65,0x7c,0x08,0x2e,0x72,0x80
+
+# ATT:   vcvtdq2ph %zmm29, %ymm30
+# INTEL: vcvtdq2ph ymm30, zmm29
+0x62,0x05,0x7c,0x48,0x5b,0xf5
+
+# ATT:   vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtdq2ph ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x5b,0xf5
+
+# ATT:   vcvtdq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtdq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtdq2ph  (%r9){1to16}, %ymm30
+# INTEL: vcvtdq2ph ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x5b,0x31
+
+# ATT:   vcvtdq2ph  8128(%rcx), %ymm30
+# INTEL: vcvtdq2ph ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f
+
+# ATT:   vcvtdq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtdq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80
+
+# ATT:   vcvtpd2ph %zmm29, %xmm30
+# INTEL: vcvtpd2ph xmm30, zmm29
+0x62,0x05,0xfd,0x48,0x5a,0xf5
+
+# ATT:   vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtpd2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xfd,0x18,0x5a,0xf5
+
+# ATT:   vcvtpd2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtpd2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtpd2ph  (%r9){1to8}, %xmm30
+# INTEL: vcvtpd2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xfd,0x58,0x5a,0x31
+
+# ATT:   vcvtpd2phz  8128(%rcx), %xmm30
+# INTEL: vcvtpd2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f
+
+# ATT:   vcvtpd2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80
+
+# ATT:   vcvtph2dq %ymm29, %zmm30
+# INTEL: vcvtph2dq zmm30, ymm29
+0x62,0x05,0x7d,0x48,0x5b,0xf5
+
+# ATT:   vcvtph2dq {rn-sae}, %ymm29, %zmm30
+# INTEL: vcvtph2dq zmm30, ymm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x5b,0xf5
+
+# ATT:   vcvtph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2dq  (%r9){1to16}, %zmm30
+# INTEL: vcvtph2dq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7d,0x58,0x5b,0x31
+
+# ATT:   vcvtph2dq  4064(%rcx), %zmm30
+# INTEL: vcvtph2dq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f
+
+# ATT:   vcvtph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80
+
+# ATT:   vcvtph2pd %xmm29, %zmm30
+# INTEL: vcvtph2pd zmm30, xmm29
+0x62,0x05,0x7c,0x48,0x5a,0xf5
+
+# ATT:   vcvtph2pd {sae}, %xmm29, %zmm30
+# INTEL: vcvtph2pd zmm30, xmm29, {sae}
+0x62,0x05,0x7c,0x18,0x5a,0xf5
+
+# ATT:   vcvtph2pd  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2pd zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2pd  (%r9){1to8}, %zmm30
+# INTEL: vcvtph2pd zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7c,0x58,0x5a,0x31
+
+# ATT:   vcvtph2pd  2032(%rcx), %zmm30
+# INTEL: vcvtph2pd zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f
+
+# ATT:   vcvtph2pd  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2pd zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80
+
+# ATT:   vcvtph2psx %ymm29, %zmm30
+# INTEL: vcvtph2psx zmm30, ymm29
+0x62,0x06,0x7d,0x48,0x13,0xf5
+
+# ATT:   vcvtph2psx {sae}, %ymm29, %zmm30
+# INTEL: vcvtph2psx zmm30, ymm29, {sae}
+0x62,0x06,0x7d,0x18,0x13,0xf5
+
+# ATT:   vcvtph2psx  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2psx zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2psx  (%r9){1to16}, %zmm30
+# INTEL: vcvtph2psx zmm30, word ptr [r9]{1to16}
+0x62,0x46,0x7d,0x58,0x13,0x31
+
+# ATT:   vcvtph2psx  4064(%rcx), %zmm30
+# INTEL: vcvtph2psx zmm30, ymmword ptr [rcx + 4064]
+0x62,0x66,0x7d,0x48,0x13,0x71,0x7f
+
+# ATT:   vcvtph2psx  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2psx zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x66,0x7d,0xdf,0x13,0x72,0x80
+
+# ATT:   vcvtph2qq %xmm29, %zmm30
+# INTEL: vcvtph2qq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x7b,0xf5
+
+# ATT:   vcvtph2qq {rn-sae}, %xmm29, %zmm30
+# INTEL: vcvtph2qq zmm30, xmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x7b,0xf5
+
+# ATT:   vcvtph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2qq  (%r9){1to8}, %zmm30
+# INTEL: vcvtph2qq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x7b,0x31
+
+# ATT:   vcvtph2qq  2032(%rcx), %zmm30
+# INTEL: vcvtph2qq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f
+
+# ATT:   vcvtph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80
+
+# ATT:   vcvtph2udq %ymm29, %zmm30
+# INTEL: vcvtph2udq zmm30, ymm29
+0x62,0x05,0x7c,0x48,0x79,0xf5
+
+# ATT:   vcvtph2udq {rn-sae}, %ymm29, %zmm30
+# INTEL: vcvtph2udq zmm30, ymm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x79,0xf5
+
+# ATT:   vcvtph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2udq  (%r9){1to16}, %zmm30
+# INTEL: vcvtph2udq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x79,0x31
+
+# ATT:   vcvtph2udq  4064(%rcx), %zmm30
+# INTEL: vcvtph2udq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7c,0x48,0x79,0x71,0x7f
+
+# ATT:   vcvtph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7c,0xdf,0x79,0x72,0x80
+
+# ATT:   vcvtph2uqq %xmm29, %zmm30
+# INTEL: vcvtph2uqq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x79,0xf5
+
+# ATT:   vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+# INTEL: vcvtph2uqq zmm30, xmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x79,0xf5
+
+# ATT:   vcvtph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uqq  (%r9){1to8}, %zmm30
+# INTEL: vcvtph2uqq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x79,0x31
+
+# ATT:   vcvtph2uqq  2032(%rcx), %zmm30
+# INTEL: vcvtph2uqq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x79,0x71,0x7f
+
+# ATT:   vcvtph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x79,0x72,0x80
+
+# ATT:   vcvtph2uw %zmm29, %zmm30
+# INTEL: vcvtph2uw zmm30, zmm29
+0x62,0x05,0x7c,0x48,0x7d,0xf5
+
+# ATT:   vcvtph2uw {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtph2uw zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7c,0x18,0x7d,0xf5
+
+# ATT:   vcvtph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uw  (%r9){1to32}, %zmm30
+# INTEL: vcvtph2uw zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7c,0x58,0x7d,0x31
+
+# ATT:   vcvtph2uw  8128(%rcx), %zmm30
+# INTEL: vcvtph2uw zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80
+
+# ATT:   vcvtph2w %zmm29, %zmm30
+# INTEL: vcvtph2w zmm30, zmm29
+0x62,0x05,0x7d,0x48,0x7d,0xf5
+
+# ATT:   vcvtph2w {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtph2w zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x7d,0xf5
+
+# ATT:   vcvtph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2w  (%r9){1to32}, %zmm30
+# INTEL: vcvtph2w zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7d,0x58,0x7d,0x31
+
+# ATT:   vcvtph2w  8128(%rcx), %zmm30
+# INTEL: vcvtph2w zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80
+
+# ATT:   vcvtps2phx %zmm29, %ymm30
+# INTEL: vcvtps2phx ymm30, zmm29
+0x62,0x05,0x7d,0x48,0x1d,0xf5
+
+# ATT:   vcvtps2phx {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtps2phx ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7d,0x18,0x1d,0xf5
+
+# ATT:   vcvtps2phx  268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtps2phx ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtps2phx  (%r9){1to16}, %ymm30
+# INTEL: vcvtps2phx ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7d,0x58,0x1d,0x31
+
+# ATT:   vcvtps2phx  8128(%rcx), %ymm30
+# INTEL: vcvtps2phx ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f
+
+# ATT:   vcvtps2phx  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtps2phx ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80
+
+# ATT:   vcvtqq2ph %zmm29, %xmm30
+# INTEL: vcvtqq2ph xmm30, zmm29
+0x62,0x05,0xfc,0x48,0x5b,0xf5
+
+# ATT:   vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtqq2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xfc,0x18,0x5b,0xf5
+
+# ATT:   vcvtqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtqq2ph  (%r9){1to8}, %xmm30
+# INTEL: vcvtqq2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xfc,0x58,0x5b,0x31
+
+# ATT:   vcvtqq2phz  8128(%rcx), %xmm30
+# INTEL: vcvtqq2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f
+
+# ATT:   vcvtqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80
+
+# ATT:   vcvtsd2sh %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, xmm28
+0x62,0x05,0x97,0x00,0x5a,0xf4
+
+# ATT:   vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, xmm28, {rn-sae}
+0x62,0x05,0x97,0x10,0x5a,0xf4
+
+# ATT:   vcvtsd2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsd2sh xmm30 {k7}, xmm29, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsd2sh  (%r9), %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [r9]
+0x62,0x45,0x97,0x00,0x5a,0x31
+
+# ATT:   vcvtsd2sh  1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsd2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x97,0x00,0x5a,0x71,0x7f
+
+# ATT:   vcvtsd2sh  -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsd2sh xmm30 {k7} {z}, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x97,0x87,0x5a,0x72,0x80
+
+# ATT:   vcvtsh2sd %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, xmm28
+0x62,0x05,0x16,0x00,0x5a,0xf4
+
+# ATT:   vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, xmm28, {sae}
+0x62,0x05,0x16,0x10,0x5a,0xf4
+
+# ATT:   vcvtsh2sd  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsh2sd xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2sd  (%r9), %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [r9]
+0x62,0x45,0x16,0x00,0x5a,0x31
+
+# ATT:   vcvtsh2sd  254(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsh2sd xmm30, xmm29, word ptr [rcx + 254]
+0x62,0x65,0x16,0x00,0x5a,0x71,0x7f
+
+# ATT:   vcvtsh2sd  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsh2sd xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
+0x62,0x65,0x16,0x87,0x5a,0x72,0x80
+
+# ATT:   vcvtsh2si %xmm30, %edx
+# INTEL: vcvtsh2si edx, xmm30
+0x62,0x95,0x7e,0x08,0x2d,0xd6
+
+# ATT:   vcvtsh2si {rn-sae}, %xmm30, %edx
+# INTEL: vcvtsh2si edx, xmm30, {rn-sae}
+0x62,0x95,0x7e,0x18,0x2d,0xd6
+
+# ATT:   vcvtsh2si %xmm30, %r12
+# INTEL: vcvtsh2si r12, xmm30
+0x62,0x15,0xfe,0x08,0x2d,0xe6
+
+# ATT:   vcvtsh2si {rn-sae}, %xmm30, %r12
+# INTEL: vcvtsh2si r12, xmm30, {rn-sae}
+0x62,0x15,0xfe,0x18,0x2d,0xe6
+
+# ATT:   vcvtsh2si  268435456(%rbp,%r14,8), %edx
+# INTEL: vcvtsh2si edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2si  (%r9), %edx
+# INTEL: vcvtsh2si edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x2d,0x11
+
+# ATT:   vcvtsh2si  254(%rcx), %edx
+# INTEL: vcvtsh2si edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f
+
+# ATT:   vcvtsh2si  -256(%rdx), %edx
+# INTEL: vcvtsh2si edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80
+
+# ATT:   vcvtsh2si  268435456(%rbp,%r14,8), %r12
+# INTEL: vcvtsh2si r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2si  (%r9), %r12
+# INTEL: vcvtsh2si r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x2d,0x21
+
+# ATT:   vcvtsh2si  254(%rcx), %r12
+# INTEL: vcvtsh2si r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f
+
+# ATT:   vcvtsh2si  -256(%rdx), %r12
+# INTEL: vcvtsh2si r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x2d,0x62,0x80
+
+# ATT:   vcvtsh2ss %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, xmm28
+0x62,0x06,0x14,0x00,0x13,0xf4
+
+# ATT:   vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, xmm28, {sae}
+0x62,0x06,0x14,0x10,0x13,0xf4
+
+# ATT:   vcvtsh2ss  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtsh2ss xmm30 {k7}, xmm29, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2ss  (%r9), %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [r9]
+0x62,0x46,0x14,0x00,0x13,0x31
+
+# ATT:   vcvtsh2ss  254(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsh2ss xmm30, xmm29, word ptr [rcx + 254]
+0x62,0x66,0x14,0x00,0x13,0x71,0x7f
+
+# ATT:   vcvtsh2ss  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtsh2ss xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]
+0x62,0x66,0x14,0x87,0x13,0x72,0x80
+
+# ATT:   vcvtsh2usi %xmm30, %edx
+# INTEL: vcvtsh2usi edx, xmm30
+0x62,0x95,0x7e,0x08,0x79,0xd6
+
+# ATT:   vcvtsh2usi {rn-sae}, %xmm30, %edx
+# INTEL: vcvtsh2usi edx, xmm30, {rn-sae}
+0x62,0x95,0x7e,0x18,0x79,0xd6
+
+# ATT:   vcvtsh2usi %xmm30, %r12
+# INTEL: vcvtsh2usi r12, xmm30
+0x62,0x15,0xfe,0x08,0x79,0xe6
+
+# ATT:   vcvtsh2usi {rn-sae}, %xmm30, %r12
+# INTEL: vcvtsh2usi r12, xmm30, {rn-sae}
+0x62,0x15,0xfe,0x18,0x79,0xe6
+
+# ATT:   vcvtsh2usi  268435456(%rbp,%r14,8), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2usi  (%r9), %edx
+# INTEL: vcvtsh2usi edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x79,0x11
+
+# ATT:   vcvtsh2usi  254(%rcx), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f
+
+# ATT:   vcvtsh2usi  -256(%rdx), %edx
+# INTEL: vcvtsh2usi edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x79,0x52,0x80
+
+# ATT:   vcvtsh2usi  268435456(%rbp,%r14,8), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsh2usi  (%r9), %r12
+# INTEL: vcvtsh2usi r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x79,0x21
+
+# ATT:   vcvtsh2usi  254(%rcx), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x79,0x61,0x7f
+
+# ATT:   vcvtsh2usi  -256(%rdx), %r12
+# INTEL: vcvtsh2usi r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x79,0x62,0x80
+
+# ATT:   vcvtsi2sh %r12, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, r12
+0x62,0x45,0x96,0x00,0x2a,0xf4
+
+# ATT:   vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, r12
+0x62,0x45,0x96,0x10,0x2a,0xf4
+
+# ATT:   vcvtsi2sh %edx, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, edx
+0x62,0x65,0x16,0x00,0x2a,0xf2
+
+# ATT:   vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, {rn-sae}, edx
+0x62,0x65,0x16,0x10,0x2a,0xf2
+
+# ATT:   vcvtsi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtsi2shl  (%r9), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x16,0x00,0x2a,0x31
+
+# ATT:   vcvtsi2shl  508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x16,0x00,0x2a,0x71,0x7f
+
+# ATT:   vcvtsi2shl  -512(%rdx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x16,0x00,0x2a,0x72,0x80
+
+# ATT:   vcvtsi2shq  1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x96,0x00,0x2a,0x71,0x7f
+
+# ATT:   vcvtsi2shq  -1024(%rdx), %xmm29, %xmm30
+# INTEL: vcvtsi2sh xmm30, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x96,0x00,0x2a,0x72,0x80
+
+# ATT:   vcvtss2sh %xmm28, %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, xmm28
+0x62,0x05,0x14,0x00,0x1d,0xf4
+
+# ATT:   vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, xmm28, {rn-sae}
+0x62,0x05,0x14,0x10,0x1d,0xf4
+
+# ATT:   vcvtss2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+# INTEL: vcvtss2sh xmm30 {k7}, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtss2sh  (%r9), %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x14,0x00,0x1d,0x31
+
+# ATT:   vcvtss2sh  508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtss2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x14,0x00,0x1d,0x71,0x7f
+
+# ATT:   vcvtss2sh  -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+# INTEL: vcvtss2sh xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x14,0x87,0x1d,0x72,0x80
+
+# ATT:   vcvttph2dq %ymm29, %zmm30
+# INTEL: vcvttph2dq zmm30, ymm29
+0x62,0x05,0x7e,0x48,0x5b,0xf5
+
+# ATT:   vcvttph2dq {sae}, %ymm29, %zmm30
+# INTEL: vcvttph2dq zmm30, ymm29, {sae}
+0x62,0x05,0x7e,0x18,0x5b,0xf5
+
+# ATT:   vcvttph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2dq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2dq  (%r9){1to16}, %zmm30
+# INTEL: vcvttph2dq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7e,0x58,0x5b,0x31
+
+# ATT:   vcvttph2dq  4064(%rcx), %zmm30
+# INTEL: vcvttph2dq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f
+
+# ATT:   vcvttph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2dq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80
+
+# ATT:   vcvttph2qq %xmm29, %zmm30
+# INTEL: vcvttph2qq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x7a,0xf5
+
+# ATT:   vcvttph2qq {sae}, %xmm29, %zmm30
+# INTEL: vcvttph2qq zmm30, xmm29, {sae}
+0x62,0x05,0x7d,0x18,0x7a,0xf5
+
+# ATT:   vcvttph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2qq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2qq  (%r9){1to8}, %zmm30
+# INTEL: vcvttph2qq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x7a,0x31
+
+# ATT:   vcvttph2qq  2032(%rcx), %zmm30
+# INTEL: vcvttph2qq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f
+
+# ATT:   vcvttph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2qq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80
+
+# ATT:   vcvttph2udq %ymm29, %zmm30
+# INTEL: vcvttph2udq zmm30, ymm29
+0x62,0x05,0x7c,0x48,0x78,0xf5
+
+# ATT:   vcvttph2udq {sae}, %ymm29, %zmm30
+# INTEL: vcvttph2udq zmm30, ymm29, {sae}
+0x62,0x05,0x7c,0x18,0x78,0xf5
+
+# ATT:   vcvttph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2udq zmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2udq  (%r9){1to16}, %zmm30
+# INTEL: vcvttph2udq zmm30, word ptr [r9]{1to16}
+0x62,0x45,0x7c,0x58,0x78,0x31
+
+# ATT:   vcvttph2udq  4064(%rcx), %zmm30
+# INTEL: vcvttph2udq zmm30, ymmword ptr [rcx + 4064]
+0x62,0x65,0x7c,0x48,0x78,0x71,0x7f
+
+# ATT:   vcvttph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2udq zmm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+0x62,0x65,0x7c,0xdf,0x78,0x72,0x80
+
+# ATT:   vcvttph2uqq %xmm29, %zmm30
+# INTEL: vcvttph2uqq zmm30, xmm29
+0x62,0x05,0x7d,0x48,0x78,0xf5
+
+# ATT:   vcvttph2uqq {sae}, %xmm29, %zmm30
+# INTEL: vcvttph2uqq zmm30, xmm29, {sae}
+0x62,0x05,0x7d,0x18,0x78,0xf5
+
+# ATT:   vcvttph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2uqq zmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uqq  (%r9){1to8}, %zmm30
+# INTEL: vcvttph2uqq zmm30, word ptr [r9]{1to8}
+0x62,0x45,0x7d,0x58,0x78,0x31
+
+# ATT:   vcvttph2uqq  2032(%rcx), %zmm30
+# INTEL: vcvttph2uqq zmm30, xmmword ptr [rcx + 2032]
+0x62,0x65,0x7d,0x48,0x78,0x71,0x7f
+
+# ATT:   vcvttph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2uqq zmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+0x62,0x65,0x7d,0xdf,0x78,0x72,0x80
+
+# ATT:   vcvttph2uw %zmm29, %zmm30
+# INTEL: vcvttph2uw zmm30, zmm29
+0x62,0x05,0x7c,0x48,0x7c,0xf5
+
+# ATT:   vcvttph2uw {sae}, %zmm29, %zmm30
+# INTEL: vcvttph2uw zmm30, zmm29, {sae}
+0x62,0x05,0x7c,0x18,0x7c,0xf5
+
+# ATT:   vcvttph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2uw zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uw  (%r9){1to32}, %zmm30
+# INTEL: vcvttph2uw zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7c,0x58,0x7c,0x31
+
+# ATT:   vcvttph2uw  8128(%rcx), %zmm30
+# INTEL: vcvttph2uw zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2uw zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80
+
+# ATT:   vcvttph2w %zmm29, %zmm30
+# INTEL: vcvttph2w zmm30, zmm29
+0x62,0x05,0x7d,0x48,0x7c,0xf5
+
+# ATT:   vcvttph2w {sae}, %zmm29, %zmm30
+# INTEL: vcvttph2w zmm30, zmm29, {sae}
+0x62,0x05,0x7d,0x18,0x7c,0xf5
+
+# ATT:   vcvttph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvttph2w zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2w  (%r9){1to32}, %zmm30
+# INTEL: vcvttph2w zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7d,0x58,0x7c,0x31
+
+# ATT:   vcvttph2w  8128(%rcx), %zmm30
+# INTEL: vcvttph2w zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvttph2w zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80
+
+# ATT:   vcvttsh2si %xmm30, %edx
+# INTEL: vcvttsh2si edx, xmm30
+0x62,0x95,0x7e,0x08,0x2c,0xd6
+
+# ATT:   vcvttsh2si {sae}, %xmm30, %edx
+# INTEL: vcvttsh2si edx, xmm30, {sae}
+0x62,0x95,0x7e,0x18,0x2c,0xd6
+
+# ATT:   vcvttsh2si %xmm30, %r12
+# INTEL: vcvttsh2si r12, xmm30
+0x62,0x15,0xfe,0x08,0x2c,0xe6
+
+# ATT:   vcvttsh2si {sae}, %xmm30, %r12
+# INTEL: vcvttsh2si r12, xmm30, {sae}
+0x62,0x15,0xfe,0x18,0x2c,0xe6
+
+# ATT:   vcvttsh2si  268435456(%rbp,%r14,8), %edx
+# INTEL: vcvttsh2si edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttsh2si  (%r9), %edx
+# INTEL: vcvttsh2si edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x2c,0x11
+
+# ATT:   vcvttsh2si  254(%rcx), %edx
+# INTEL: vcvttsh2si edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f
+
+# ATT:   vcvttsh2si  -256(%rdx), %edx
+# INTEL: vcvttsh2si edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80
+
+# ATT:   vcvttsh2si  268435456(%rbp,%r14,8), %r12
+# INTEL: vcvttsh2si r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttsh2si  (%r9), %r12
+# INTEL: vcvttsh2si r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x2c,0x21
+
+# ATT:   vcvttsh2si  254(%rcx), %r12
+# INTEL: vcvttsh2si r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f
+
+# ATT:   vcvttsh2si  -256(%rdx), %r12
+# INTEL: vcvttsh2si r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x2c,0x62,0x80
+
+# ATT:   vcvttsh2usi %xmm30, %edx
+# INTEL: vcvttsh2usi edx, xmm30
+0x62,0x95,0x7e,0x08,0x78,0xd6
+
+# ATT:   vcvttsh2usi {sae}, %xmm30, %edx
+# INTEL: vcvttsh2usi edx, xmm30, {sae}
+0x62,0x95,0x7e,0x18,0x78,0xd6
+
+# ATT:   vcvttsh2usi %xmm30, %r12
+# INTEL: vcvttsh2usi r12, xmm30
+0x62,0x15,0xfe,0x08,0x78,0xe6
+
+# ATT:   vcvttsh2usi {sae}, %xmm30, %r12
+# INTEL: vcvttsh2usi r12, xmm30, {sae}
+0x62,0x15,0xfe,0x18,0x78,0xe6
+
+# ATT:   vcvttsh2usi  268435456(%rbp,%r14,8), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttsh2usi  (%r9), %edx
+# INTEL: vcvttsh2usi edx, word ptr [r9]
+0x62,0xd5,0x7e,0x08,0x78,0x11
+
+# ATT:   vcvttsh2usi  254(%rcx), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rcx + 254]
+0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f
+
+# ATT:   vcvttsh2usi  -256(%rdx), %edx
+# INTEL: vcvttsh2usi edx, word ptr [rdx - 256]
+0x62,0xf5,0x7e,0x08,0x78,0x52,0x80
+
+# ATT:   vcvttsh2usi  268435456(%rbp,%r14,8), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttsh2usi  (%r9), %r12
+# INTEL: vcvttsh2usi r12, word ptr [r9]
+0x62,0x55,0xfe,0x08,0x78,0x21
+
+# ATT:   vcvttsh2usi  254(%rcx), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rcx + 254]
+0x62,0x75,0xfe,0x08,0x78,0x61,0x7f
+
+# ATT:   vcvttsh2usi  -256(%rdx), %r12
+# INTEL: vcvttsh2usi r12, word ptr [rdx - 256]
+0x62,0x75,0xfe,0x08,0x78,0x62,0x80
+
+# ATT:   vcvtudq2ph %zmm29, %ymm30
+# INTEL: vcvtudq2ph ymm30, zmm29
+0x62,0x05,0x7f,0x48,0x7a,0xf5
+
+# ATT:   vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+# INTEL: vcvtudq2ph ymm30, zmm29, {rn-sae}
+0x62,0x05,0x7f,0x18,0x7a,0xf5
+
+# ATT:   vcvtudq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+# INTEL: vcvtudq2ph ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtudq2ph  (%r9){1to16}, %ymm30
+# INTEL: vcvtudq2ph ymm30, dword ptr [r9]{1to16}
+0x62,0x45,0x7f,0x58,0x7a,0x31
+
+# ATT:   vcvtudq2ph  8128(%rcx), %ymm30
+# INTEL: vcvtudq2ph ymm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f
+
+# ATT:   vcvtudq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+# INTEL: vcvtudq2ph ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80
+
+# ATT:   vcvtuqq2ph %zmm29, %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmm29
+0x62,0x05,0xff,0x48,0x7a,0xf5
+
+# ATT:   vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmm29, {rn-sae}
+0x62,0x05,0xff,0x18,0x7a,0xf5
+
+# ATT:   vcvtuqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+# INTEL: vcvtuqq2ph xmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtuqq2ph  (%r9){1to8}, %xmm30
+# INTEL: vcvtuqq2ph xmm30, qword ptr [r9]{1to8}
+0x62,0x45,0xff,0x58,0x7a,0x31
+
+# ATT:   vcvtuqq2phz  8128(%rcx), %xmm30
+# INTEL: vcvtuqq2ph xmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0xff,0x48,0x7a,0x71,0x7f
+
+# ATT:   vcvtuqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0x65,0xff,0xdf,0x7a,0x72,0x80
+
+# ATT:   vcvtusi2sh %r12, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, r12
+0x62,0x45,0x96,0x00,0x7b,0xf4
+
+# ATT:   vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, r12
+0x62,0x45,0x96,0x10,0x7b,0xf4
+
+# ATT:   vcvtusi2sh %edx, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, edx
+0x62,0x65,0x16,0x00,0x7b,0xf2
+
+# ATT:   vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, {rn-sae}, edx
+0x62,0x65,0x16,0x10,0x7b,0xf2
+
+# ATT:   vcvtusi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtusi2shl  (%r9), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [r9]
+0x62,0x45,0x16,0x00,0x7b,0x31
+
+# ATT:   vcvtusi2shl  508(%rcx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rcx + 508]
+0x62,0x65,0x16,0x00,0x7b,0x71,0x7f
+
+# ATT:   vcvtusi2shl  -512(%rdx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, dword ptr [rdx - 512]
+0x62,0x65,0x16,0x00,0x7b,0x72,0x80
+
+# ATT:   vcvtusi2shq  1016(%rcx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rcx + 1016]
+0x62,0x65,0x96,0x00,0x7b,0x71,0x7f
+
+# ATT:   vcvtusi2shq  -1024(%rdx), %xmm29, %xmm30
+# INTEL: vcvtusi2sh xmm30, xmm29, qword ptr [rdx - 1024]
+0x62,0x65,0x96,0x00,0x7b,0x72,0x80
+
+# ATT:   vcvtuw2ph %zmm29, %zmm30
+# INTEL: vcvtuw2ph zmm30, zmm29
+0x62,0x05,0x7f,0x48,0x7d,0xf5
+
+# ATT:   vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtuw2ph zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7f,0x18,0x7d,0xf5
+
+# ATT:   vcvtuw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtuw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtuw2ph  (%r9){1to32}, %zmm30
+# INTEL: vcvtuw2ph zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7f,0x58,0x7d,0x31
+
+# ATT:   vcvtuw2ph  8128(%rcx), %zmm30
+# INTEL: vcvtuw2ph zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f
+
+# ATT:   vcvtuw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtuw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80
+
+# ATT:   vcvtw2ph %zmm29, %zmm30
+# INTEL: vcvtw2ph zmm30, zmm29
+0x62,0x05,0x7e,0x48,0x7d,0xf5
+
+# ATT:   vcvtw2ph {rn-sae}, %zmm29, %zmm30
+# INTEL: vcvtw2ph zmm30, zmm29, {rn-sae}
+0x62,0x05,0x7e,0x18,0x7d,0xf5
+
+# ATT:   vcvtw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+# INTEL: vcvtw2ph zmm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtw2ph  (%r9){1to32}, %zmm30
+# INTEL: vcvtw2ph zmm30, word ptr [r9]{1to32}
+0x62,0x45,0x7e,0x58,0x7d,0x31
+
+# ATT:   vcvtw2ph  8128(%rcx), %zmm30
+# INTEL: vcvtw2ph zmm30, zmmword ptr [rcx + 8128]
+0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f
+
+# ATT:   vcvtw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+# INTEL: vcvtw2ph zmm30 {k7} {z}, word ptr [rdx - 256]{1to32}
+0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80

diff  --git a/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt b/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
index 362215492e1b3..63acd5be1946f 100644
--- a/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
+++ b/llvm/test/MC/Disassembler/X86/avx512fp16vl.txt
@@ -280,3 +280,859 @@
 # ATT:   vsubph  -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
 # INTEL: vsubph xmm6 {k7} {z}, xmm5, word ptr [edx - 256]{1to8}
 0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80
+
+# ATT:   vcvtdq2ph %xmm5, %xmm6
+# INTEL: vcvtdq2ph xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x5b,0xf5
+
+# ATT:   vcvtdq2ph %ymm5, %xmm6
+# INTEL: vcvtdq2ph xmm6, ymm5
+0x62,0xf5,0x7c,0x28,0x5b,0xf5
+
+# ATT:   vcvtdq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtdq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtdq2ph  (%ecx){1to4}, %xmm6
+# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x5b,0x31
+
+# ATT:   vcvtdq2phx  2032(%ecx), %xmm6
+# INTEL: vcvtdq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f
+
+# ATT:   vcvtdq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80
+
+# ATT:   vcvtdq2ph  (%ecx){1to8}, %xmm6
+# INTEL: vcvtdq2ph xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x5b,0x31
+
+# ATT:   vcvtdq2phy  4064(%ecx), %xmm6
+# INTEL: vcvtdq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f
+
+# ATT:   vcvtdq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtdq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80
+
+# ATT:   vcvtpd2ph %xmm5, %xmm6
+# INTEL: vcvtpd2ph xmm6, xmm5
+0x62,0xf5,0xfd,0x08,0x5a,0xf5
+
+# ATT:   vcvtpd2ph %ymm5, %xmm6
+# INTEL: vcvtpd2ph xmm6, ymm5
+0x62,0xf5,0xfd,0x28,0x5a,0xf5
+
+# ATT:   vcvtpd2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtpd2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtpd2ph  (%ecx){1to2}, %xmm6
+# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xfd,0x18,0x5a,0x31
+
+# ATT:   vcvtpd2phx  2032(%ecx), %xmm6
+# INTEL: vcvtpd2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f
+
+# ATT:   vcvtpd2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80
+
+# ATT:   vcvtpd2ph  (%ecx){1to4}, %xmm6
+# INTEL: vcvtpd2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xfd,0x38,0x5a,0x31
+
+# ATT:   vcvtpd2phy  4064(%ecx), %xmm6
+# INTEL: vcvtpd2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f
+
+# ATT:   vcvtpd2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80
+
+# ATT:   vcvtph2dq %xmm5, %xmm6
+# INTEL: vcvtph2dq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x5b,0xf5
+
+# ATT:   vcvtph2dq %xmm5, %ymm6
+# INTEL: vcvtph2dq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x5b,0xf5
+
+# ATT:   vcvtph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2dq  (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2dq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x18,0x5b,0x31
+
+# ATT:   vcvtph2dq  1016(%ecx), %xmm6
+# INTEL: vcvtph2dq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f
+
+# ATT:   vcvtph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80
+
+# ATT:   vcvtph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2dq  (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2dq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x38,0x5b,0x31
+
+# ATT:   vcvtph2dq  2032(%ecx), %ymm6
+# INTEL: vcvtph2dq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f
+
+# ATT:   vcvtph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80
+
+# ATT:   vcvtph2pd %xmm5, %xmm6
+# INTEL: vcvtph2pd xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x5a,0xf5
+
+# ATT:   vcvtph2pd %xmm5, %ymm6
+# INTEL: vcvtph2pd ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x5a,0xf5
+
+# ATT:   vcvtph2pd  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2pd xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2pd  (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2pd xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7c,0x18,0x5a,0x31
+
+# ATT:   vcvtph2pd  508(%ecx), %xmm6
+# INTEL: vcvtph2pd xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f
+
+# ATT:   vcvtph2pd  -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2pd xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80
+
+# ATT:   vcvtph2pd  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2pd ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2pd  (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2pd ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x38,0x5a,0x31
+
+# ATT:   vcvtph2pd  1016(%ecx), %ymm6
+# INTEL: vcvtph2pd ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f
+
+# ATT:   vcvtph2pd  -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2pd ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80
+
+# ATT:   vcvtph2psx %xmm5, %xmm6
+# INTEL: vcvtph2psx xmm6, xmm5
+0x62,0xf6,0x7d,0x08,0x13,0xf5
+
+# ATT:   vcvtph2psx %xmm5, %ymm6
+# INTEL: vcvtph2psx ymm6, xmm5
+0x62,0xf6,0x7d,0x28,0x13,0xf5
+
+# ATT:   vcvtph2psx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2psx xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2psx  (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2psx xmm6, word ptr [ecx]{1to4}
+0x62,0xf6,0x7d,0x18,0x13,0x31
+
+# ATT:   vcvtph2psx  1016(%ecx), %xmm6
+# INTEL: vcvtph2psx xmm6, qword ptr [ecx + 1016]
+0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f
+
+# ATT:   vcvtph2psx  -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2psx xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80
+
+# ATT:   vcvtph2psx  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2psx ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2psx  (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2psx ymm6, word ptr [ecx]{1to8}
+0x62,0xf6,0x7d,0x38,0x13,0x31
+
+# ATT:   vcvtph2psx  2032(%ecx), %ymm6
+# INTEL: vcvtph2psx ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f
+
+# ATT:   vcvtph2psx  -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2psx ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80
+
+# ATT:   vcvtph2qq %xmm5, %xmm6
+# INTEL: vcvtph2qq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7b,0xf5
+
+# ATT:   vcvtph2qq %xmm5, %ymm6
+# INTEL: vcvtph2qq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x7b,0xf5
+
+# ATT:   vcvtph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2qq  (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2qq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x7b,0x31
+
+# ATT:   vcvtph2qq  508(%ecx), %xmm6
+# INTEL: vcvtph2qq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f
+
+# ATT:   vcvtph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80
+
+# ATT:   vcvtph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2qq  (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2qq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x7b,0x31
+
+# ATT:   vcvtph2qq  1016(%ecx), %ymm6
+# INTEL: vcvtph2qq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f
+
+# ATT:   vcvtph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80
+
+# ATT:   vcvtph2udq %xmm5, %xmm6
+# INTEL: vcvtph2udq xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x79,0xf5
+
+# ATT:   vcvtph2udq %xmm5, %ymm6
+# INTEL: vcvtph2udq ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x79,0xf5
+
+# ATT:   vcvtph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2udq  (%ecx){1to4}, %xmm6
+# INTEL: vcvtph2udq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x79,0x31
+
+# ATT:   vcvtph2udq  1016(%ecx), %xmm6
+# INTEL: vcvtph2udq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f
+
+# ATT:   vcvtph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80
+
+# ATT:   vcvtph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2udq  (%ecx){1to8}, %ymm6
+# INTEL: vcvtph2udq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x79,0x31
+
+# ATT:   vcvtph2udq  2032(%ecx), %ymm6
+# INTEL: vcvtph2udq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f
+
+# ATT:   vcvtph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80
+
+# ATT:   vcvtph2uqq %xmm5, %xmm6
+# INTEL: vcvtph2uqq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x79,0xf5
+
+# ATT:   vcvtph2uqq %xmm5, %ymm6
+# INTEL: vcvtph2uqq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x79,0xf5
+
+# ATT:   vcvtph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uqq  (%ecx){1to2}, %xmm6
+# INTEL: vcvtph2uqq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x79,0x31
+
+# ATT:   vcvtph2uqq  508(%ecx), %xmm6
+# INTEL: vcvtph2uqq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f
+
+# ATT:   vcvtph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80
+
+# ATT:   vcvtph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uqq  (%ecx){1to4}, %ymm6
+# INTEL: vcvtph2uqq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x79,0x31
+
+# ATT:   vcvtph2uqq  1016(%ecx), %ymm6
+# INTEL: vcvtph2uqq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f
+
+# ATT:   vcvtph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80
+
+# ATT:   vcvtph2uw %xmm5, %xmm6
+# INTEL: vcvtph2uw xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x7d,0xf5
+
+# ATT:   vcvtph2uw %ymm5, %ymm6
+# INTEL: vcvtph2uw ymm6, ymm5
+0x62,0xf5,0x7c,0x28,0x7d,0xf5
+
+# ATT:   vcvtph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uw  (%ecx){1to8}, %xmm6
+# INTEL: vcvtph2uw xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x18,0x7d,0x31
+
+# ATT:   vcvtph2uw  2032(%ecx), %xmm6
+# INTEL: vcvtph2uw xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80
+
+# ATT:   vcvtph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2uw  (%ecx){1to16}, %ymm6
+# INTEL: vcvtph2uw ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7c,0x38,0x7d,0x31
+
+# ATT:   vcvtph2uw  4064(%ecx), %ymm6
+# INTEL: vcvtph2uw ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80
+
+# ATT:   vcvtph2w %xmm5, %xmm6
+# INTEL: vcvtph2w xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7d,0xf5
+
+# ATT:   vcvtph2w %ymm5, %ymm6
+# INTEL: vcvtph2w ymm6, ymm5
+0x62,0xf5,0x7d,0x28,0x7d,0xf5
+
+# ATT:   vcvtph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2w  (%ecx){1to8}, %xmm6
+# INTEL: vcvtph2w xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x18,0x7d,0x31
+
+# ATT:   vcvtph2w  2032(%ecx), %xmm6
+# INTEL: vcvtph2w xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80
+
+# ATT:   vcvtph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtph2w  (%ecx){1to16}, %ymm6
+# INTEL: vcvtph2w ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7d,0x38,0x7d,0x31
+
+# ATT:   vcvtph2w  4064(%ecx), %ymm6
+# INTEL: vcvtph2w ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f
+
+# ATT:   vcvtph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80
+
+# ATT:   vcvtps2phx %xmm5, %xmm6
+# INTEL: vcvtps2phx xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x1d,0xf5
+
+# ATT:   vcvtps2phx %ymm5, %xmm6
+# INTEL: vcvtps2phx xmm6, ymm5
+0x62,0xf5,0x7d,0x28,0x1d,0xf5
+
+# ATT:   vcvtps2phxx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtps2phx xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtps2phx  (%ecx){1to4}, %xmm6
+# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x18,0x1d,0x31
+
+# ATT:   vcvtps2phxx  2032(%ecx), %xmm6
+# INTEL: vcvtps2phx xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f
+
+# ATT:   vcvtps2phx  -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80
+
+# ATT:   vcvtps2phx  (%ecx){1to8}, %xmm6
+# INTEL: vcvtps2phx xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x38,0x1d,0x31
+
+# ATT:   vcvtps2phxy  4064(%ecx), %xmm6
+# INTEL: vcvtps2phx xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f
+
+# ATT:   vcvtps2phx  -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtps2phx xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80
+
+# ATT:   vcvtqq2ph %xmm5, %xmm6
+# INTEL: vcvtqq2ph xmm6, xmm5
+0x62,0xf5,0xfc,0x08,0x5b,0xf5
+
+# ATT:   vcvtqq2ph %ymm5, %xmm6
+# INTEL: vcvtqq2ph xmm6, ymm5
+0x62,0xf5,0xfc,0x28,0x5b,0xf5
+
+# ATT:   vcvtqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtqq2ph  (%ecx){1to2}, %xmm6
+# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xfc,0x18,0x5b,0x31
+
+# ATT:   vcvtqq2phx  2032(%ecx), %xmm6
+# INTEL: vcvtqq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f
+
+# ATT:   vcvtqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80
+
+# ATT:   vcvtqq2ph  (%ecx){1to4}, %xmm6
+# INTEL: vcvtqq2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xfc,0x38,0x5b,0x31
+
+# ATT:   vcvtqq2phy  4064(%ecx), %xmm6
+# INTEL: vcvtqq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f
+
+# ATT:   vcvtqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80
+
+# ATT:   vcvttph2dq %xmm5, %xmm6
+# INTEL: vcvttph2dq xmm6, xmm5
+0x62,0xf5,0x7e,0x08,0x5b,0xf5
+
+# ATT:   vcvttph2dq %xmm5, %ymm6
+# INTEL: vcvttph2dq ymm6, xmm5
+0x62,0xf5,0x7e,0x28,0x5b,0xf5
+
+# ATT:   vcvttph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2dq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2dq  (%ecx){1to4}, %xmm6
+# INTEL: vcvttph2dq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7e,0x18,0x5b,0x31
+
+# ATT:   vcvttph2dq  1016(%ecx), %xmm6
+# INTEL: vcvttph2dq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f
+
+# ATT:   vcvttph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2dq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80
+
+# ATT:   vcvttph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2dq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2dq  (%ecx){1to8}, %ymm6
+# INTEL: vcvttph2dq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7e,0x38,0x5b,0x31
+
+# ATT:   vcvttph2dq  2032(%ecx), %ymm6
+# INTEL: vcvttph2dq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f
+
+# ATT:   vcvttph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2dq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80
+
+# ATT:   vcvttph2qq %xmm5, %xmm6
+# INTEL: vcvttph2qq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7a,0xf5
+
+# ATT:   vcvttph2qq %xmm5, %ymm6
+# INTEL: vcvttph2qq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x7a,0xf5
+
+# ATT:   vcvttph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2qq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2qq  (%ecx){1to2}, %xmm6
+# INTEL: vcvttph2qq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x7a,0x31
+
+# ATT:   vcvttph2qq  508(%ecx), %xmm6
+# INTEL: vcvttph2qq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f
+
+# ATT:   vcvttph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2qq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80
+
+# ATT:   vcvttph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2qq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2qq  (%ecx){1to4}, %ymm6
+# INTEL: vcvttph2qq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x7a,0x31
+
+# ATT:   vcvttph2qq  1016(%ecx), %ymm6
+# INTEL: vcvttph2qq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f
+
+# ATT:   vcvttph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2qq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80
+
+# ATT:   vcvttph2udq %xmm5, %xmm6
+# INTEL: vcvttph2udq xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x78,0xf5
+
+# ATT:   vcvttph2udq %xmm5, %ymm6
+# INTEL: vcvttph2udq ymm6, xmm5
+0x62,0xf5,0x7c,0x28,0x78,0xf5
+
+# ATT:   vcvttph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2udq xmm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2udq  (%ecx){1to4}, %xmm6
+# INTEL: vcvttph2udq xmm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7c,0x18,0x78,0x31
+
+# ATT:   vcvttph2udq  1016(%ecx), %xmm6
+# INTEL: vcvttph2udq xmm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f
+
+# ATT:   vcvttph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2udq xmm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80
+
+# ATT:   vcvttph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2udq ymm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2udq  (%ecx){1to8}, %ymm6
+# INTEL: vcvttph2udq ymm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x38,0x78,0x31
+
+# ATT:   vcvttph2udq  2032(%ecx), %ymm6
+# INTEL: vcvttph2udq ymm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f
+
+# ATT:   vcvttph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2udq ymm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80
+
+# ATT:   vcvttph2uqq %xmm5, %xmm6
+# INTEL: vcvttph2uqq xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x78,0xf5
+
+# ATT:   vcvttph2uqq %xmm5, %ymm6
+# INTEL: vcvttph2uqq ymm6, xmm5
+0x62,0xf5,0x7d,0x28,0x78,0xf5
+
+# ATT:   vcvttph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2uqq xmm6 {k7}, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uqq  (%ecx){1to2}, %xmm6
+# INTEL: vcvttph2uqq xmm6, word ptr [ecx]{1to2}
+0x62,0xf5,0x7d,0x18,0x78,0x31
+
+# ATT:   vcvttph2uqq  508(%ecx), %xmm6
+# INTEL: vcvttph2uqq xmm6, dword ptr [ecx + 508]
+0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f
+
+# ATT:   vcvttph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2uqq xmm6 {k7} {z}, word ptr [edx - 256]{1to2}
+0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80
+
+# ATT:   vcvttph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2uqq ymm6 {k7}, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uqq  (%ecx){1to4}, %ymm6
+# INTEL: vcvttph2uqq ymm6, word ptr [ecx]{1to4}
+0x62,0xf5,0x7d,0x38,0x78,0x31
+
+# ATT:   vcvttph2uqq  1016(%ecx), %ymm6
+# INTEL: vcvttph2uqq ymm6, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f
+
+# ATT:   vcvttph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2uqq ymm6 {k7} {z}, word ptr [edx - 256]{1to4}
+0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80
+
+# ATT:   vcvttph2uw %xmm5, %xmm6
+# INTEL: vcvttph2uw xmm6, xmm5
+0x62,0xf5,0x7c,0x08,0x7c,0xf5
+
+# ATT:   vcvttph2uw %ymm5, %ymm6
+# INTEL: vcvttph2uw ymm6, ymm5
+0x62,0xf5,0x7c,0x28,0x7c,0xf5
+
+# ATT:   vcvttph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2uw xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uw  (%ecx){1to8}, %xmm6
+# INTEL: vcvttph2uw xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7c,0x18,0x7c,0x31
+
+# ATT:   vcvttph2uw  2032(%ecx), %xmm6
+# INTEL: vcvttph2uw xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2uw xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80
+
+# ATT:   vcvttph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2uw ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2uw  (%ecx){1to16}, %ymm6
+# INTEL: vcvttph2uw ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7c,0x38,0x7c,0x31
+
+# ATT:   vcvttph2uw  4064(%ecx), %ymm6
+# INTEL: vcvttph2uw ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2uw ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80
+
+# ATT:   vcvttph2w %xmm5, %xmm6
+# INTEL: vcvttph2w xmm6, xmm5
+0x62,0xf5,0x7d,0x08,0x7c,0xf5
+
+# ATT:   vcvttph2w %ymm5, %ymm6
+# INTEL: vcvttph2w ymm6, ymm5
+0x62,0xf5,0x7d,0x28,0x7c,0xf5
+
+# ATT:   vcvttph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvttph2w xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2w  (%ecx){1to8}, %xmm6
+# INTEL: vcvttph2w xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7d,0x18,0x7c,0x31
+
+# ATT:   vcvttph2w  2032(%ecx), %xmm6
+# INTEL: vcvttph2w xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvttph2w xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80
+
+# ATT:   vcvttph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvttph2w ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvttph2w  (%ecx){1to16}, %ymm6
+# INTEL: vcvttph2w ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7d,0x38,0x7c,0x31
+
+# ATT:   vcvttph2w  4064(%ecx), %ymm6
+# INTEL: vcvttph2w ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f
+
+# ATT:   vcvttph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvttph2w ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80
+
+# ATT:   vcvtudq2ph %xmm5, %xmm6
+# INTEL: vcvtudq2ph xmm6, xmm5
+0x62,0xf5,0x7f,0x08,0x7a,0xf5
+
+# ATT:   vcvtudq2ph %ymm5, %xmm6
+# INTEL: vcvtudq2ph xmm6, ymm5
+0x62,0xf5,0x7f,0x28,0x7a,0xf5
+
+# ATT:   vcvtudq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtudq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtudq2ph  (%ecx){1to4}, %xmm6
+# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to4}
+0x62,0xf5,0x7f,0x18,0x7a,0x31
+
+# ATT:   vcvtudq2phx  2032(%ecx), %xmm6
+# INTEL: vcvtudq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f
+
+# ATT:   vcvtudq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80
+
+# ATT:   vcvtudq2ph  (%ecx){1to8}, %xmm6
+# INTEL: vcvtudq2ph xmm6, dword ptr [ecx]{1to8}
+0x62,0xf5,0x7f,0x38,0x7a,0x31
+
+# ATT:   vcvtudq2phy  4064(%ecx), %xmm6
+# INTEL: vcvtudq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f
+
+# ATT:   vcvtudq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtudq2ph xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80
+
+# ATT:   vcvtuqq2ph %xmm5, %xmm6
+# INTEL: vcvtuqq2ph xmm6, xmm5
+0x62,0xf5,0xff,0x08,0x7a,0xf5
+
+# ATT:   vcvtuqq2ph %ymm5, %xmm6
+# INTEL: vcvtuqq2ph xmm6, ymm5
+0x62,0xf5,0xff,0x28,0x7a,0xf5
+
+# ATT:   vcvtuqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtuqq2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtuqq2ph  (%ecx){1to2}, %xmm6
+# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to2}
+0x62,0xf5,0xff,0x18,0x7a,0x31
+
+# ATT:   vcvtuqq2phx  2032(%ecx), %xmm6
+# INTEL: vcvtuqq2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f
+
+# ATT:   vcvtuqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80
+
+# ATT:   vcvtuqq2ph  (%ecx){1to4}, %xmm6
+# INTEL: vcvtuqq2ph xmm6, qword ptr [ecx]{1to4}
+0x62,0xf5,0xff,0x38,0x7a,0x31
+
+# ATT:   vcvtuqq2phy  4064(%ecx), %xmm6
+# INTEL: vcvtuqq2ph xmm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f
+
+# ATT:   vcvtuqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+# INTEL: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80
+
+# ATT:   vcvtuw2ph %xmm5, %xmm6
+# INTEL: vcvtuw2ph xmm6, xmm5
+0x62,0xf5,0x7f,0x08,0x7d,0xf5
+
+# ATT:   vcvtuw2ph %ymm5, %ymm6
+# INTEL: vcvtuw2ph ymm6, ymm5
+0x62,0xf5,0x7f,0x28,0x7d,0xf5
+
+# ATT:   vcvtuw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtuw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtuw2ph  (%ecx){1to8}, %xmm6
+# INTEL: vcvtuw2ph xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7f,0x18,0x7d,0x31
+
+# ATT:   vcvtuw2ph  2032(%ecx), %xmm6
+# INTEL: vcvtuw2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f
+
+# ATT:   vcvtuw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtuw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80
+
+# ATT:   vcvtuw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtuw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtuw2ph  (%ecx){1to16}, %ymm6
+# INTEL: vcvtuw2ph ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7f,0x38,0x7d,0x31
+
+# ATT:   vcvtuw2ph  4064(%ecx), %ymm6
+# INTEL: vcvtuw2ph ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f
+
+# ATT:   vcvtuw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtuw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80
+
+# ATT:   vcvtw2ph %xmm5, %xmm6
+# INTEL: vcvtw2ph xmm6, xmm5
+0x62,0xf5,0x7e,0x08,0x7d,0xf5
+
+# ATT:   vcvtw2ph %ymm5, %ymm6
+# INTEL: vcvtw2ph ymm6, ymm5
+0x62,0xf5,0x7e,0x28,0x7d,0xf5
+
+# ATT:   vcvtw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+# INTEL: vcvtw2ph xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtw2ph  (%ecx){1to8}, %xmm6
+# INTEL: vcvtw2ph xmm6, word ptr [ecx]{1to8}
+0x62,0xf5,0x7e,0x18,0x7d,0x31
+
+# ATT:   vcvtw2ph  2032(%ecx), %xmm6
+# INTEL: vcvtw2ph xmm6, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f
+
+# ATT:   vcvtw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+# INTEL: vcvtw2ph xmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80
+
+# ATT:   vcvtw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+# INTEL: vcvtw2ph ymm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   vcvtw2ph  (%ecx){1to16}, %ymm6
+# INTEL: vcvtw2ph ymm6, word ptr [ecx]{1to16}
+0x62,0xf5,0x7e,0x38,0x7d,0x31
+
+# ATT:   vcvtw2ph  4064(%ecx), %ymm6
+# INTEL: vcvtw2ph ymm6, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f
+
+# ATT:   vcvtw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}
+# INTEL: vcvtw2ph ymm6 {k7} {z}, word ptr [edx - 256]{1to16}
+0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80

diff  --git a/llvm/test/MC/X86/avx512fp16.s b/llvm/test/MC/X86/avx512fp16.s
index c45d0956faa1c..1ca659f29acea 100644
--- a/llvm/test/MC/X86/avx512fp16.s
+++ b/llvm/test/MC/X86/avx512fp16.s
@@ -459,3 +459,899 @@
 // CHECK: vucomish  -256(%rdx), %xmm30
 // CHECK: encoding: [0x62,0x65,0x7c,0x08,0x2e,0x72,0x80]
           vucomish  -256(%rdx), %xmm30
+
+// CHECK: vcvtdq2ph %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5b,0xf5]
+          vcvtdq2ph %zmm29, %ymm30
+
+// CHECK: vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5b,0xf5]
+          vcvtdq2ph {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtdq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtdq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtdq2ph  (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5b,0x31]
+          vcvtdq2ph  (%r9){1to16}, %ymm30
+
+// CHECK: vcvtdq2ph  8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5b,0x71,0x7f]
+          vcvtdq2ph  8128(%rcx), %ymm30
+
+// CHECK: vcvtdq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5b,0x72,0x80]
+          vcvtdq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtpd2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfd,0x48,0x5a,0xf5]
+          vcvtpd2ph %zmm29, %xmm30
+
+// CHECK: vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfd,0x18,0x5a,0xf5]
+          vcvtpd2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtpd2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xfd,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtpd2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtpd2ph  (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xfd,0x58,0x5a,0x31]
+          vcvtpd2ph  (%r9){1to8}, %xmm30
+
+// CHECK: vcvtpd2phz  8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xfd,0x48,0x5a,0x71,0x7f]
+          vcvtpd2phz  8128(%rcx), %xmm30
+
+// CHECK: vcvtpd2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xfd,0xdf,0x5a,0x72,0x80]
+          vcvtpd2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtph2dq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x5b,0xf5]
+          vcvtph2dq %ymm29, %zmm30
+
+// CHECK: vcvtph2dq {rn-sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x5b,0xf5]
+          vcvtph2dq {rn-sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2dq  (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x5b,0x31]
+          vcvtph2dq  (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2dq  4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x5b,0x71,0x7f]
+          vcvtph2dq  4064(%rcx), %zmm30
+
+// CHECK: vcvtph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x5b,0x72,0x80]
+          vcvtph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2pd %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x5a,0xf5]
+          vcvtph2pd %xmm29, %zmm30
+
+// CHECK: vcvtph2pd {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x5a,0xf5]
+          vcvtph2pd {sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2pd  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2pd  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2pd  (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x5a,0x31]
+          vcvtph2pd  (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2pd  2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x5a,0x71,0x7f]
+          vcvtph2pd  2032(%rcx), %zmm30
+
+// CHECK: vcvtph2pd  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x5a,0x72,0x80]
+          vcvtph2pd  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2psx %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x06,0x7d,0x48,0x13,0xf5]
+          vcvtph2psx %ymm29, %zmm30
+
+// CHECK: vcvtph2psx {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x06,0x7d,0x18,0x13,0xf5]
+          vcvtph2psx {sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2psx  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x26,0x7d,0x4f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2psx  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2psx  (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x46,0x7d,0x58,0x13,0x31]
+          vcvtph2psx  (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2psx  4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x66,0x7d,0x48,0x13,0x71,0x7f]
+          vcvtph2psx  4064(%rcx), %zmm30
+
+// CHECK: vcvtph2psx  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x66,0x7d,0xdf,0x13,0x72,0x80]
+          vcvtph2psx  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2qq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7b,0xf5]
+          vcvtph2qq %xmm29, %zmm30
+
+// CHECK: vcvtph2qq {rn-sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7b,0xf5]
+          vcvtph2qq {rn-sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2qq  (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7b,0x31]
+          vcvtph2qq  (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2qq  2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7b,0x71,0x7f]
+          vcvtph2qq  2032(%rcx), %zmm30
+
+// CHECK: vcvtph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7b,0x72,0x80]
+          vcvtph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2udq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x79,0xf5]
+          vcvtph2udq %ymm29, %zmm30
+
+// CHECK: vcvtph2udq {rn-sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x79,0xf5]
+          vcvtph2udq {rn-sae}, %ymm29, %zmm30
+
+// CHECK: vcvtph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2udq  (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x79,0x31]
+          vcvtph2udq  (%r9){1to16}, %zmm30
+
+// CHECK: vcvtph2udq  4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x79,0x71,0x7f]
+          vcvtph2udq  4064(%rcx), %zmm30
+
+// CHECK: vcvtph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x79,0x72,0x80]
+          vcvtph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2uqq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x79,0xf5]
+          vcvtph2uqq %xmm29, %zmm30
+
+// CHECK: vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x79,0xf5]
+          vcvtph2uqq {rn-sae}, %xmm29, %zmm30
+
+// CHECK: vcvtph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2uqq  (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x79,0x31]
+          vcvtph2uqq  (%r9){1to8}, %zmm30
+
+// CHECK: vcvtph2uqq  2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x79,0x71,0x7f]
+          vcvtph2uqq  2032(%rcx), %zmm30
+
+// CHECK: vcvtph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x79,0x72,0x80]
+          vcvtph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2uw %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7d,0xf5]
+          vcvtph2uw %zmm29, %zmm30
+
+// CHECK: vcvtph2uw {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7d,0xf5]
+          vcvtph2uw {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2uw  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7d,0x31]
+          vcvtph2uw  (%r9){1to32}, %zmm30
+
+// CHECK: vcvtph2uw  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7d,0x71,0x7f]
+          vcvtph2uw  8128(%rcx), %zmm30
+
+// CHECK: vcvtph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7d,0x72,0x80]
+          vcvtph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtph2w %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7d,0xf5]
+          vcvtph2w %zmm29, %zmm30
+
+// CHECK: vcvtph2w {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7d,0xf5]
+          vcvtph2w {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtph2w  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7d,0x31]
+          vcvtph2w  (%r9){1to32}, %zmm30
+
+// CHECK: vcvtph2w  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7d,0x71,0x7f]
+          vcvtph2w  8128(%rcx), %zmm30
+
+// CHECK: vcvtph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7d,0x72,0x80]
+          vcvtph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtps2phx %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x1d,0xf5]
+          vcvtps2phx %zmm29, %ymm30
+
+// CHECK: vcvtps2phx {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x1d,0xf5]
+          vcvtps2phx {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtps2phx  268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtps2phx  268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtps2phx  (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x1d,0x31]
+          vcvtps2phx  (%r9){1to16}, %ymm30
+
+// CHECK: vcvtps2phx  8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x1d,0x71,0x7f]
+          vcvtps2phx  8128(%rcx), %ymm30
+
+// CHECK: vcvtps2phx  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x1d,0x72,0x80]
+          vcvtps2phx  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtqq2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfc,0x48,0x5b,0xf5]
+          vcvtqq2ph %zmm29, %xmm30
+
+// CHECK: vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xfc,0x18,0x5b,0xf5]
+          vcvtqq2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xfc,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtqq2ph  (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xfc,0x58,0x5b,0x31]
+          vcvtqq2ph  (%r9){1to8}, %xmm30
+
+// CHECK: vcvtqq2phz  8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xfc,0x48,0x5b,0x71,0x7f]
+          vcvtqq2phz  8128(%rcx), %xmm30
+
+// CHECK: vcvtqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xfc,0xdf,0x5b,0x72,0x80]
+          vcvtqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsd2sh %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x97,0x00,0x5a,0xf4]
+          vcvtsd2sh %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x97,0x10,0x5a,0xf4]
+          vcvtsd2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x97,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsd2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsd2sh  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x97,0x00,0x5a,0x31]
+          vcvtsd2sh  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh  1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x97,0x00,0x5a,0x71,0x7f]
+          vcvtsd2sh  1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsd2sh  -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x97,0x87,0x5a,0x72,0x80]
+          vcvtsd2sh  -1024(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2sd %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x16,0x00,0x5a,0xf4]
+          vcvtsh2sd %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x16,0x10,0x5a,0xf4]
+          vcvtsh2sd {sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x16,0x07,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2sd  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsh2sd  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x5a,0x31]
+          vcvtsh2sd  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd  254(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x5a,0x71,0x7f]
+          vcvtsh2sd  254(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsh2sd  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x16,0x87,0x5a,0x72,0x80]
+          vcvtsh2sd  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2si %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2d,0xd6]
+          vcvtsh2si %xmm30, %edx
+
+// CHECK: vcvtsh2si {rn-sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2d,0xd6]
+          vcvtsh2si {rn-sae}, %xmm30, %edx
+
+// CHECK: vcvtsh2si %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2d,0xe6]
+          vcvtsh2si %xmm30, %r12
+
+// CHECK: vcvtsh2si {rn-sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2d,0xe6]
+          vcvtsh2si {rn-sae}, %xmm30, %r12
+
+// CHECK: vcvtsh2si  268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2d,0x94,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2si  268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvtsh2si  (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2d,0x11]
+          vcvtsh2si  (%r9), %edx
+
+// CHECK: vcvtsh2si  254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
+          vcvtsh2si  254(%rcx), %edx
+
+// CHECK: vcvtsh2si  -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
+          vcvtsh2si  -256(%rdx), %edx
+
+// CHECK: vcvtsh2si  268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2d,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2si  268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvtsh2si  (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2d,0x21]
+          vcvtsh2si  (%r9), %r12
+
+// CHECK: vcvtsh2si  254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x61,0x7f]
+          vcvtsh2si  254(%rcx), %r12
+
+// CHECK: vcvtsh2si  -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2d,0x62,0x80]
+          vcvtsh2si  -256(%rdx), %r12
+
+// CHECK: vcvtsh2ss %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x06,0x14,0x00,0x13,0xf4]
+          vcvtsh2ss %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x06,0x14,0x10,0x13,0xf4]
+          vcvtsh2ss {sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x26,0x14,0x07,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2ss  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtsh2ss  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x46,0x14,0x00,0x13,0x31]
+          vcvtsh2ss  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss  254(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x66,0x14,0x00,0x13,0x71,0x7f]
+          vcvtsh2ss  254(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsh2ss  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x66,0x14,0x87,0x13,0x72,0x80]
+          vcvtsh2ss  -256(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtsh2usi %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x79,0xd6]
+          vcvtsh2usi %xmm30, %edx
+
+// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x79,0xd6]
+          vcvtsh2usi {rn-sae}, %xmm30, %edx
+
+// CHECK: vcvtsh2usi %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x79,0xe6]
+          vcvtsh2usi %xmm30, %r12
+
+// CHECK: vcvtsh2usi {rn-sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x79,0xe6]
+          vcvtsh2usi {rn-sae}, %xmm30, %r12
+
+// CHECK: vcvtsh2usi  268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x79,0x94,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2usi  268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvtsh2usi  (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x79,0x11]
+          vcvtsh2usi  (%r9), %edx
+
+// CHECK: vcvtsh2usi  254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
+          vcvtsh2usi  254(%rcx), %edx
+
+// CHECK: vcvtsh2usi  -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
+          vcvtsh2usi  -256(%rdx), %edx
+
+// CHECK: vcvtsh2usi  268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x79,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsh2usi  268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvtsh2usi  (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x79,0x21]
+          vcvtsh2usi  (%r9), %r12
+
+// CHECK: vcvtsh2usi  254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x61,0x7f]
+          vcvtsh2usi  254(%rcx), %r12
+
+// CHECK: vcvtsh2usi  -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x79,0x62,0x80]
+          vcvtsh2usi  -256(%rdx), %r12
+
+// CHECK: vcvtsi2sh %r12, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x00,0x2a,0xf4]
+          vcvtsi2sh %r12, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x10,0x2a,0xf4]
+          vcvtsi2sh %r12, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %edx, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0xf2]
+          vcvtsi2sh %edx, %xmm29, %xmm30
+
+// CHECK: vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x10,0x2a,0xf2]
+          vcvtsi2sh %edx, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x25,0x16,0x00,0x2a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtsi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x2a,0x31]
+          vcvtsi2shl  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl  508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x71,0x7f]
+          vcvtsi2shl  508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shl  -512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x2a,0x72,0x80]
+          vcvtsi2shl  -512(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shq  1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x71,0x7f]
+          vcvtsi2shq  1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtsi2shq  -1024(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x2a,0x72,0x80]
+          vcvtsi2shq  -1024(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x14,0x00,0x1d,0xf4]
+          vcvtss2sh %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0x14,0x10,0x1d,0xf4]
+          vcvtss2sh {rn-sae}, %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtss2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x14,0x07,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtss2sh  268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtss2sh  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x14,0x00,0x1d,0x31]
+          vcvtss2sh  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh  508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x14,0x00,0x1d,0x71,0x7f]
+          vcvtss2sh  508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtss2sh  -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x14,0x87,0x1d,0x72,0x80]
+          vcvtss2sh  -512(%rdx), %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvttph2dq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x5b,0xf5]
+          vcvttph2dq %ymm29, %zmm30
+
+// CHECK: vcvttph2dq {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x5b,0xf5]
+          vcvttph2dq {sae}, %ymm29, %zmm30
+
+// CHECK: vcvttph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2dq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2dq  (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x5b,0x31]
+          vcvttph2dq  (%r9){1to16}, %zmm30
+
+// CHECK: vcvttph2dq  4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x5b,0x71,0x7f]
+          vcvttph2dq  4064(%rcx), %zmm30
+
+// CHECK: vcvttph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x5b,0x72,0x80]
+          vcvttph2dq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2qq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7a,0xf5]
+          vcvttph2qq %xmm29, %zmm30
+
+// CHECK: vcvttph2qq {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7a,0xf5]
+          vcvttph2qq {sae}, %xmm29, %zmm30
+
+// CHECK: vcvttph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2qq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2qq  (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7a,0x31]
+          vcvttph2qq  (%r9){1to8}, %zmm30
+
+// CHECK: vcvttph2qq  2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7a,0x71,0x7f]
+          vcvttph2qq  2032(%rcx), %zmm30
+
+// CHECK: vcvttph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7a,0x72,0x80]
+          vcvttph2qq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2udq %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x78,0xf5]
+          vcvttph2udq %ymm29, %zmm30
+
+// CHECK: vcvttph2udq {sae}, %ymm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x78,0xf5]
+          vcvttph2udq {sae}, %ymm29, %zmm30
+
+// CHECK: vcvttph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2udq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2udq  (%r9){1to16}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x78,0x31]
+          vcvttph2udq  (%r9){1to16}, %zmm30
+
+// CHECK: vcvttph2udq  4064(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x78,0x71,0x7f]
+          vcvttph2udq  4064(%rcx), %zmm30
+
+// CHECK: vcvttph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x78,0x72,0x80]
+          vcvttph2udq  -256(%rdx){1to16}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2uqq %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x78,0xf5]
+          vcvttph2uqq %xmm29, %zmm30
+
+// CHECK: vcvttph2uqq {sae}, %xmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x78,0xf5]
+          vcvttph2uqq {sae}, %xmm29, %zmm30
+
+// CHECK: vcvttph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uqq  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2uqq  (%r9){1to8}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x78,0x31]
+          vcvttph2uqq  (%r9){1to8}, %zmm30
+
+// CHECK: vcvttph2uqq  2032(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x78,0x71,0x7f]
+          vcvttph2uqq  2032(%rcx), %zmm30
+
+// CHECK: vcvttph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x78,0x72,0x80]
+          vcvttph2uqq  -256(%rdx){1to8}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2uw %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x48,0x7c,0xf5]
+          vcvttph2uw %zmm29, %zmm30
+
+// CHECK: vcvttph2uw {sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7c,0x18,0x7c,0xf5]
+          vcvttph2uw {sae}, %zmm29, %zmm30
+
+// CHECK: vcvttph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7c,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uw  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2uw  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7c,0x58,0x7c,0x31]
+          vcvttph2uw  (%r9){1to32}, %zmm30
+
+// CHECK: vcvttph2uw  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7c,0x48,0x7c,0x71,0x7f]
+          vcvttph2uw  8128(%rcx), %zmm30
+
+// CHECK: vcvttph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7c,0xdf,0x7c,0x72,0x80]
+          vcvttph2uw  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttph2w %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x48,0x7c,0xf5]
+          vcvttph2w %zmm29, %zmm30
+
+// CHECK: vcvttph2w {sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7d,0x18,0x7c,0xf5]
+          vcvttph2w {sae}, %zmm29, %zmm30
+
+// CHECK: vcvttph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7d,0x4f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2w  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvttph2w  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7d,0x58,0x7c,0x31]
+          vcvttph2w  (%r9){1to32}, %zmm30
+
+// CHECK: vcvttph2w  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7d,0x48,0x7c,0x71,0x7f]
+          vcvttph2w  8128(%rcx), %zmm30
+
+// CHECK: vcvttph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7d,0xdf,0x7c,0x72,0x80]
+          vcvttph2w  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvttsh2si %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x2c,0xd6]
+          vcvttsh2si %xmm30, %edx
+
+// CHECK: vcvttsh2si {sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x2c,0xd6]
+          vcvttsh2si {sae}, %xmm30, %edx
+
+// CHECK: vcvttsh2si %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x2c,0xe6]
+          vcvttsh2si %xmm30, %r12
+
+// CHECK: vcvttsh2si {sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x2c,0xe6]
+          vcvttsh2si {sae}, %xmm30, %r12
+
+// CHECK: vcvttsh2si  268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x2c,0x94,0xf5,0x00,0x00,0x00,0x10]
+          vcvttsh2si  268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvttsh2si  (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x2c,0x11]
+          vcvttsh2si  (%r9), %edx
+
+// CHECK: vcvttsh2si  254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
+          vcvttsh2si  254(%rcx), %edx
+
+// CHECK: vcvttsh2si  -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
+          vcvttsh2si  -256(%rdx), %edx
+
+// CHECK: vcvttsh2si  268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x2c,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttsh2si  268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvttsh2si  (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x2c,0x21]
+          vcvttsh2si  (%r9), %r12
+
+// CHECK: vcvttsh2si  254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x61,0x7f]
+          vcvttsh2si  254(%rcx), %r12
+
+// CHECK: vcvttsh2si  -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x2c,0x62,0x80]
+          vcvttsh2si  -256(%rdx), %r12
+
+// CHECK: vcvttsh2usi %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x08,0x78,0xd6]
+          vcvttsh2usi %xmm30, %edx
+
+// CHECK: vcvttsh2usi {sae}, %xmm30, %edx
+// CHECK: encoding: [0x62,0x95,0x7e,0x18,0x78,0xd6]
+          vcvttsh2usi {sae}, %xmm30, %edx
+
+// CHECK: vcvttsh2usi %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x08,0x78,0xe6]
+          vcvttsh2usi %xmm30, %r12
+
+// CHECK: vcvttsh2usi {sae}, %xmm30, %r12
+// CHECK: encoding: [0x62,0x15,0xfe,0x18,0x78,0xe6]
+          vcvttsh2usi {sae}, %xmm30, %r12
+
+// CHECK: vcvttsh2usi  268435456(%rbp,%r14,8), %edx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x78,0x94,0xf5,0x00,0x00,0x00,0x10]
+          vcvttsh2usi  268435456(%rbp,%r14,8), %edx
+
+// CHECK: vcvttsh2usi  (%r9), %edx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x78,0x11]
+          vcvttsh2usi  (%r9), %edx
+
+// CHECK: vcvttsh2usi  254(%rcx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
+          vcvttsh2usi  254(%rcx), %edx
+
+// CHECK: vcvttsh2usi  -256(%rdx), %edx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
+          vcvttsh2usi  -256(%rdx), %edx
+
+// CHECK: vcvttsh2usi  268435456(%rbp,%r14,8), %r12
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x78,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttsh2usi  268435456(%rbp,%r14,8), %r12
+
+// CHECK: vcvttsh2usi  (%r9), %r12
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x78,0x21]
+          vcvttsh2usi  (%r9), %r12
+
+// CHECK: vcvttsh2usi  254(%rcx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x61,0x7f]
+          vcvttsh2usi  254(%rcx), %r12
+
+// CHECK: vcvttsh2usi  -256(%rdx), %r12
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x78,0x62,0x80]
+          vcvttsh2usi  -256(%rdx), %r12
+
+// CHECK: vcvtudq2ph %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7a,0xf5]
+          vcvtudq2ph %zmm29, %ymm30
+
+// CHECK: vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7a,0xf5]
+          vcvtudq2ph {rn-sae}, %zmm29, %ymm30
+
+// CHECK: vcvtudq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtudq2ph  268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtudq2ph  (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7a,0x31]
+          vcvtudq2ph  (%r9){1to16}, %ymm30
+
+// CHECK: vcvtudq2ph  8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7a,0x71,0x7f]
+          vcvtudq2ph  8128(%rcx), %ymm30
+
+// CHECK: vcvtudq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7a,0x72,0x80]
+          vcvtudq2ph  -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vcvtuqq2ph %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xff,0x48,0x7a,0xf5]
+          vcvtuqq2ph %zmm29, %xmm30
+
+// CHECK: vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+// CHECK: encoding: [0x62,0x05,0xff,0x18,0x7a,0xf5]
+          vcvtuqq2ph {rn-sae}, %zmm29, %xmm30
+
+// CHECK: vcvtuqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0xff,0x4f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtuqq2phz  268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtuqq2ph  (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x45,0xff,0x58,0x7a,0x31]
+          vcvtuqq2ph  (%r9){1to8}, %xmm30
+
+// CHECK: vcvtuqq2phz  8128(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x65,0xff,0x48,0x7a,0x71,0x7f]
+          vcvtuqq2phz  8128(%rcx), %xmm30
+
+// CHECK: vcvtuqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0xff,0xdf,0x7a,0x72,0x80]
+          vcvtuqq2ph  -1024(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtusi2sh %r12, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x00,0x7b,0xf4]
+          vcvtusi2sh %r12, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x96,0x10,0x7b,0xf4]
+          vcvtusi2sh %r12, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %edx, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0xf2]
+          vcvtusi2sh %edx, %xmm29, %xmm30
+
+// CHECK: vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x10,0x7b,0xf2]
+          vcvtusi2sh %edx, {rn-sae}, %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x25,0x16,0x00,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtusi2shl  268435456(%rbp,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl  (%r9), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x45,0x16,0x00,0x7b,0x31]
+          vcvtusi2shl  (%r9), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl  508(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x71,0x7f]
+          vcvtusi2shl  508(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shl  -512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x16,0x00,0x7b,0x72,0x80]
+          vcvtusi2shl  -512(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shq  1016(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x71,0x7f]
+          vcvtusi2shq  1016(%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtusi2shq  -1024(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x65,0x96,0x00,0x7b,0x72,0x80]
+          vcvtusi2shq  -1024(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtuw2ph %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x48,0x7d,0xf5]
+          vcvtuw2ph %zmm29, %zmm30
+
+// CHECK: vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7f,0x18,0x7d,0xf5]
+          vcvtuw2ph {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtuw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7f,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtuw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtuw2ph  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7f,0x58,0x7d,0x31]
+          vcvtuw2ph  (%r9){1to32}, %zmm30
+
+// CHECK: vcvtuw2ph  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7f,0x48,0x7d,0x71,0x7f]
+          vcvtuw2ph  8128(%rcx), %zmm30
+
+// CHECK: vcvtuw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7f,0xdf,0x7d,0x72,0x80]
+          vcvtuw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+
+// CHECK: vcvtw2ph %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x48,0x7d,0xf5]
+          vcvtw2ph %zmm29, %zmm30
+
+// CHECK: vcvtw2ph {rn-sae}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x05,0x7e,0x18,0x7d,0xf5]
+          vcvtw2ph {rn-sae}, %zmm29, %zmm30
+
+// CHECK: vcvtw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x25,0x7e,0x4f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtw2ph  268435456(%rbp,%r14,8), %zmm30 {%k7}
+
+// CHECK: vcvtw2ph  (%r9){1to32}, %zmm30
+// CHECK: encoding: [0x62,0x45,0x7e,0x58,0x7d,0x31]
+          vcvtw2ph  (%r9){1to32}, %zmm30
+
+// CHECK: vcvtw2ph  8128(%rcx), %zmm30
+// CHECK: encoding: [0x62,0x65,0x7e,0x48,0x7d,0x71,0x7f]
+          vcvtw2ph  8128(%rcx), %zmm30
+
+// CHECK: vcvtw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x65,0x7e,0xdf,0x7d,0x72,0x80]
+          vcvtw2ph  -256(%rdx){1to32}, %zmm30 {%k7} {z}

diff  --git a/llvm/test/MC/X86/avx512fp16vl.s b/llvm/test/MC/X86/avx512fp16vl.s
index e0ce1b996e906..466af9663d21a 100644
--- a/llvm/test/MC/X86/avx512fp16vl.s
+++ b/llvm/test/MC/X86/avx512fp16vl.s
@@ -279,3 +279,859 @@
 // CHECK: vsubph  -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
 // CHECK: encoding: [0x62,0xf5,0x54,0x9f,0x5c,0x72,0x80]
           vsubph  -256(%edx){1to8}, %xmm5, %xmm6 {%k7} {z}
+
+// CHECK: vcvtdq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0xf5]
+          vcvtdq2ph %xmm5, %xmm6
+
+// CHECK: vcvtdq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0xf5]
+          vcvtdq2ph %ymm5, %xmm6
+
+// CHECK: vcvtdq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtdq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtdq2ph  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0x31]
+          vcvtdq2ph  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtdq2phx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5b,0x71,0x7f]
+          vcvtdq2phx  2032(%ecx), %xmm6
+
+// CHECK: vcvtdq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5b,0x72,0x80]
+          vcvtdq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtdq2ph  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5b,0x31]
+          vcvtdq2ph  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtdq2phy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5b,0x71,0x7f]
+          vcvtdq2phy  4064(%ecx), %xmm6
+
+// CHECK: vcvtdq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5b,0x72,0x80]
+          vcvtdq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtpd2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0xf5]
+          vcvtpd2ph %xmm5, %xmm6
+
+// CHECK: vcvtpd2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0xf5]
+          vcvtpd2ph %ymm5, %xmm6
+
+// CHECK: vcvtpd2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtpd2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtpd2ph  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0x31]
+          vcvtpd2ph  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtpd2phx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x5a,0x71,0x7f]
+          vcvtpd2phx  2032(%ecx), %xmm6
+
+// CHECK: vcvtpd2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x5a,0x72,0x80]
+          vcvtpd2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtpd2ph  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x5a,0x31]
+          vcvtpd2ph  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtpd2phy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x5a,0x71,0x7f]
+          vcvtpd2phy  4064(%ecx), %xmm6
+
+// CHECK: vcvtpd2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x5a,0x72,0x80]
+          vcvtpd2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2dq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0xf5]
+          vcvtph2dq %xmm5, %xmm6
+
+// CHECK: vcvtph2dq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0xf5]
+          vcvtph2dq %xmm5, %ymm6
+
+// CHECK: vcvtph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2dq  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0x31]
+          vcvtph2dq  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2dq  1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x5b,0x71,0x7f]
+          vcvtph2dq  1016(%ecx), %xmm6
+
+// CHECK: vcvtph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x5b,0x72,0x80]
+          vcvtph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2dq  (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x5b,0x31]
+          vcvtph2dq  (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2dq  2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x5b,0x71,0x7f]
+          vcvtph2dq  2032(%ecx), %ymm6
+
+// CHECK: vcvtph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x5b,0x72,0x80]
+          vcvtph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2pd %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0xf5]
+          vcvtph2pd %xmm5, %xmm6
+
+// CHECK: vcvtph2pd %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0xf5]
+          vcvtph2pd %xmm5, %ymm6
+
+// CHECK: vcvtph2pd  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2pd  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2pd  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0x31]
+          vcvtph2pd  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2pd  508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x5a,0x71,0x7f]
+          vcvtph2pd  508(%ecx), %xmm6
+
+// CHECK: vcvtph2pd  -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x5a,0x72,0x80]
+          vcvtph2pd  -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2pd  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2pd  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2pd  (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x5a,0x31]
+          vcvtph2pd  (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2pd  1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x5a,0x71,0x7f]
+          vcvtph2pd  1016(%ecx), %ymm6
+
+// CHECK: vcvtph2pd  -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x5a,0x72,0x80]
+          vcvtph2pd  -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2psx %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0xf5]
+          vcvtph2psx %xmm5, %xmm6
+
+// CHECK: vcvtph2psx %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0xf5]
+          vcvtph2psx %xmm5, %ymm6
+
+// CHECK: vcvtph2psx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2psx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2psx  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0x31]
+          vcvtph2psx  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2psx  1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x08,0x13,0x71,0x7f]
+          vcvtph2psx  1016(%ecx), %xmm6
+
+// CHECK: vcvtph2psx  -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x9f,0x13,0x72,0x80]
+          vcvtph2psx  -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2psx  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x2f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2psx  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2psx  (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x38,0x13,0x31]
+          vcvtph2psx  (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2psx  2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf6,0x7d,0x28,0x13,0x71,0x7f]
+          vcvtph2psx  2032(%ecx), %ymm6
+
+// CHECK: vcvtph2psx  -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf6,0x7d,0xbf,0x13,0x72,0x80]
+          vcvtph2psx  -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2qq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0xf5]
+          vcvtph2qq %xmm5, %xmm6
+
+// CHECK: vcvtph2qq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0xf5]
+          vcvtph2qq %xmm5, %ymm6
+
+// CHECK: vcvtph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2qq  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0x31]
+          vcvtph2qq  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2qq  508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7b,0x71,0x7f]
+          vcvtph2qq  508(%ecx), %xmm6
+
+// CHECK: vcvtph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7b,0x72,0x80]
+          vcvtph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2qq  (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7b,0x31]
+          vcvtph2qq  (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2qq  1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7b,0x71,0x7f]
+          vcvtph2qq  1016(%ecx), %ymm6
+
+// CHECK: vcvtph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7b,0x72,0x80]
+          vcvtph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2udq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0xf5]
+          vcvtph2udq %xmm5, %xmm6
+
+// CHECK: vcvtph2udq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0xf5]
+          vcvtph2udq %xmm5, %ymm6
+
+// CHECK: vcvtph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2udq  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0x31]
+          vcvtph2udq  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtph2udq  1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x79,0x71,0x7f]
+          vcvtph2udq  1016(%ecx), %xmm6
+
+// CHECK: vcvtph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x79,0x72,0x80]
+          vcvtph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2udq  (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x79,0x31]
+          vcvtph2udq  (%ecx){1to8}, %ymm6
+
+// CHECK: vcvtph2udq  2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x79,0x71,0x7f]
+          vcvtph2udq  2032(%ecx), %ymm6
+
+// CHECK: vcvtph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x79,0x72,0x80]
+          vcvtph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2uqq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0xf5]
+          vcvtph2uqq %xmm5, %xmm6
+
+// CHECK: vcvtph2uqq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0xf5]
+          vcvtph2uqq %xmm5, %ymm6
+
+// CHECK: vcvtph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2uqq  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0x31]
+          vcvtph2uqq  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtph2uqq  508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x79,0x71,0x7f]
+          vcvtph2uqq  508(%ecx), %xmm6
+
+// CHECK: vcvtph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x79,0x72,0x80]
+          vcvtph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2uqq  (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x79,0x31]
+          vcvtph2uqq  (%ecx){1to4}, %ymm6
+
+// CHECK: vcvtph2uqq  1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x79,0x71,0x7f]
+          vcvtph2uqq  1016(%ecx), %ymm6
+
+// CHECK: vcvtph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x79,0x72,0x80]
+          vcvtph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2uw %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0xf5]
+          vcvtph2uw %xmm5, %xmm6
+
+// CHECK: vcvtph2uw %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0xf5]
+          vcvtph2uw %ymm5, %ymm6
+
+// CHECK: vcvtph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2uw  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0x31]
+          vcvtph2uw  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtph2uw  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7d,0x71,0x7f]
+          vcvtph2uw  2032(%ecx), %xmm6
+
+// CHECK: vcvtph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7d,0x72,0x80]
+          vcvtph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2uw  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7d,0x31]
+          vcvtph2uw  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtph2uw  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7d,0x71,0x7f]
+          vcvtph2uw  4064(%ecx), %ymm6
+
+// CHECK: vcvtph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7d,0x72,0x80]
+          vcvtph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtph2w %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0xf5]
+          vcvtph2w %xmm5, %xmm6
+
+// CHECK: vcvtph2w %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0xf5]
+          vcvtph2w %ymm5, %ymm6
+
+// CHECK: vcvtph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtph2w  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0x31]
+          vcvtph2w  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtph2w  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7d,0x71,0x7f]
+          vcvtph2w  2032(%ecx), %xmm6
+
+// CHECK: vcvtph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7d,0x72,0x80]
+          vcvtph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtph2w  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7d,0x31]
+          vcvtph2w  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtph2w  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7d,0x71,0x7f]
+          vcvtph2w  4064(%ecx), %ymm6
+
+// CHECK: vcvtph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7d,0x72,0x80]
+          vcvtph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtps2phx %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0xf5]
+          vcvtps2phx %xmm5, %xmm6
+
+// CHECK: vcvtps2phx %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0xf5]
+          vcvtps2phx %ymm5, %xmm6
+
+// CHECK: vcvtps2phxx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtps2phxx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtps2phx  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0x31]
+          vcvtps2phx  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtps2phxx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x1d,0x71,0x7f]
+          vcvtps2phxx  2032(%ecx), %xmm6
+
+// CHECK: vcvtps2phx  -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x1d,0x72,0x80]
+          vcvtps2phx  -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtps2phx  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x1d,0x31]
+          vcvtps2phx  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtps2phxy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x1d,0x71,0x7f]
+          vcvtps2phxy  4064(%ecx), %xmm6
+
+// CHECK: vcvtps2phx  -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x1d,0x72,0x80]
+          vcvtps2phx  -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtqq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0xf5]
+          vcvtqq2ph %xmm5, %xmm6
+
+// CHECK: vcvtqq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0xf5]
+          vcvtqq2ph %ymm5, %xmm6
+
+// CHECK: vcvtqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtqq2ph  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0x31]
+          vcvtqq2ph  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtqq2phx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x5b,0x71,0x7f]
+          vcvtqq2phx  2032(%ecx), %xmm6
+
+// CHECK: vcvtqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x5b,0x72,0x80]
+          vcvtqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtqq2ph  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x5b,0x31]
+          vcvtqq2ph  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtqq2phy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x5b,0x71,0x7f]
+          vcvtqq2phy  4064(%ecx), %xmm6
+
+// CHECK: vcvtqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x5b,0x72,0x80]
+          vcvtqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2dq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0xf5]
+          vcvttph2dq %xmm5, %xmm6
+
+// CHECK: vcvttph2dq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0xf5]
+          vcvttph2dq %xmm5, %ymm6
+
+// CHECK: vcvttph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2dq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2dq  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0x31]
+          vcvttph2dq  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvttph2dq  1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x5b,0x71,0x7f]
+          vcvttph2dq  1016(%ecx), %xmm6
+
+// CHECK: vcvttph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x5b,0x72,0x80]
+          vcvttph2dq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2dq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2dq  (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x5b,0x31]
+          vcvttph2dq  (%ecx){1to8}, %ymm6
+
+// CHECK: vcvttph2dq  2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x5b,0x71,0x7f]
+          vcvttph2dq  2032(%ecx), %ymm6
+
+// CHECK: vcvttph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x5b,0x72,0x80]
+          vcvttph2dq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2qq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0xf5]
+          vcvttph2qq %xmm5, %xmm6
+
+// CHECK: vcvttph2qq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0xf5]
+          vcvttph2qq %xmm5, %ymm6
+
+// CHECK: vcvttph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2qq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2qq  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0x31]
+          vcvttph2qq  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvttph2qq  508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7a,0x71,0x7f]
+          vcvttph2qq  508(%ecx), %xmm6
+
+// CHECK: vcvttph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7a,0x72,0x80]
+          vcvttph2qq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2qq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2qq  (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7a,0x31]
+          vcvttph2qq  (%ecx){1to4}, %ymm6
+
+// CHECK: vcvttph2qq  1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7a,0x71,0x7f]
+          vcvttph2qq  1016(%ecx), %ymm6
+
+// CHECK: vcvttph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7a,0x72,0x80]
+          vcvttph2qq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2udq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0xf5]
+          vcvttph2udq %xmm5, %xmm6
+
+// CHECK: vcvttph2udq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0xf5]
+          vcvttph2udq %xmm5, %ymm6
+
+// CHECK: vcvttph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2udq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2udq  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0x31]
+          vcvttph2udq  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvttph2udq  1016(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x78,0x71,0x7f]
+          vcvttph2udq  1016(%ecx), %xmm6
+
+// CHECK: vcvttph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x78,0x72,0x80]
+          vcvttph2udq  -256(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2udq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2udq  (%ecx){1to8}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x78,0x31]
+          vcvttph2udq  (%ecx){1to8}, %ymm6
+
+// CHECK: vcvttph2udq  2032(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x78,0x71,0x7f]
+          vcvttph2udq  2032(%ecx), %ymm6
+
+// CHECK: vcvttph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x78,0x72,0x80]
+          vcvttph2udq  -256(%edx){1to8}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2uqq %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0xf5]
+          vcvttph2uqq %xmm5, %xmm6
+
+// CHECK: vcvttph2uqq %xmm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0xf5]
+          vcvttph2uqq %xmm5, %ymm6
+
+// CHECK: vcvttph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uqq  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2uqq  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0x31]
+          vcvttph2uqq  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvttph2uqq  508(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x78,0x71,0x7f]
+          vcvttph2uqq  508(%ecx), %xmm6
+
+// CHECK: vcvttph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x78,0x72,0x80]
+          vcvttph2uqq  -256(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uqq  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2uqq  (%ecx){1to4}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x78,0x31]
+          vcvttph2uqq  (%ecx){1to4}, %ymm6
+
+// CHECK: vcvttph2uqq  1016(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x78,0x71,0x7f]
+          vcvttph2uqq  1016(%ecx), %ymm6
+
+// CHECK: vcvttph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x78,0x72,0x80]
+          vcvttph2uqq  -256(%edx){1to4}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2uw %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0xf5]
+          vcvttph2uw %xmm5, %xmm6
+
+// CHECK: vcvttph2uw %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0xf5]
+          vcvttph2uw %ymm5, %ymm6
+
+// CHECK: vcvttph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uw  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2uw  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0x31]
+          vcvttph2uw  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvttph2uw  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x7c,0x71,0x7f]
+          vcvttph2uw  2032(%ecx), %xmm6
+
+// CHECK: vcvttph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x7c,0x72,0x80]
+          vcvttph2uw  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uw  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2uw  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x7c,0x31]
+          vcvttph2uw  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvttph2uw  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x7c,0x71,0x7f]
+          vcvttph2uw  4064(%ecx), %ymm6
+
+// CHECK: vcvttph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x7c,0x72,0x80]
+          vcvttph2uw  -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvttph2w %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0xf5]
+          vcvttph2w %xmm5, %xmm6
+
+// CHECK: vcvttph2w %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0xf5]
+          vcvttph2w %ymm5, %ymm6
+
+// CHECK: vcvttph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2w  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvttph2w  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0x31]
+          vcvttph2w  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvttph2w  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7c,0x71,0x7f]
+          vcvttph2w  2032(%ecx), %xmm6
+
+// CHECK: vcvttph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x7c,0x72,0x80]
+          vcvttph2w  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvttph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2w  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvttph2w  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x7c,0x31]
+          vcvttph2w  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvttph2w  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x7c,0x71,0x7f]
+          vcvttph2w  4064(%ecx), %ymm6
+
+// CHECK: vcvttph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x7c,0x72,0x80]
+          vcvttph2w  -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtudq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0xf5]
+          vcvtudq2ph %xmm5, %xmm6
+
+// CHECK: vcvtudq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0xf5]
+          vcvtudq2ph %ymm5, %xmm6
+
+// CHECK: vcvtudq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtudq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtudq2ph  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0x31]
+          vcvtudq2ph  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtudq2phx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7a,0x71,0x7f]
+          vcvtudq2phx  2032(%ecx), %xmm6
+
+// CHECK: vcvtudq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7a,0x72,0x80]
+          vcvtudq2ph  -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtudq2ph  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7a,0x31]
+          vcvtudq2ph  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtudq2phy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7a,0x71,0x7f]
+          vcvtudq2phy  4064(%ecx), %xmm6
+
+// CHECK: vcvtudq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7a,0x72,0x80]
+          vcvtudq2ph  -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuqq2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0xf5]
+          vcvtuqq2ph %xmm5, %xmm6
+
+// CHECK: vcvtuqq2ph %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0xf5]
+          vcvtuqq2ph %ymm5, %xmm6
+
+// CHECK: vcvtuqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xff,0x0f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtuqq2phx  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtuqq2ph  (%ecx){1to2}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0x31]
+          vcvtuqq2ph  (%ecx){1to2}, %xmm6
+
+// CHECK: vcvtuqq2phx  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x08,0x7a,0x71,0x7f]
+          vcvtuqq2phx  2032(%ecx), %xmm6
+
+// CHECK: vcvtuqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xff,0x9f,0x7a,0x72,0x80]
+          vcvtuqq2ph  -1024(%edx){1to2}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuqq2ph  (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x38,0x7a,0x31]
+          vcvtuqq2ph  (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtuqq2phy  4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0xff,0x28,0x7a,0x71,0x7f]
+          vcvtuqq2phy  4064(%ecx), %xmm6
+
+// CHECK: vcvtuqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xff,0xbf,0x7a,0x72,0x80]
+          vcvtuqq2ph  -1024(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuw2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0xf5]
+          vcvtuw2ph %xmm5, %xmm6
+
+// CHECK: vcvtuw2ph %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0xf5]
+          vcvtuw2ph %ymm5, %ymm6
+
+// CHECK: vcvtuw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtuw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtuw2ph  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0x31]
+          vcvtuw2ph  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtuw2ph  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x7d,0x71,0x7f]
+          vcvtuw2ph  2032(%ecx), %xmm6
+
+// CHECK: vcvtuw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x9f,0x7d,0x72,0x80]
+          vcvtuw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtuw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtuw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtuw2ph  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x38,0x7d,0x31]
+          vcvtuw2ph  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtuw2ph  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7f,0x28,0x7d,0x71,0x7f]
+          vcvtuw2ph  4064(%ecx), %ymm6
+
+// CHECK: vcvtuw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xbf,0x7d,0x72,0x80]
+          vcvtuw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vcvtw2ph %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0xf5]
+          vcvtw2ph %xmm5, %xmm6
+
+// CHECK: vcvtw2ph %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0xf5]
+          vcvtw2ph %ymm5, %ymm6
+
+// CHECK: vcvtw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x0f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtw2ph  268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtw2ph  (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0x31]
+          vcvtw2ph  (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtw2ph  2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7d,0x71,0x7f]
+          vcvtw2ph  2032(%ecx), %xmm6
+
+// CHECK: vcvtw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x9f,0x7d,0x72,0x80]
+          vcvtw2ph  -256(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x2f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtw2ph  268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtw2ph  (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x38,0x7d,0x31]
+          vcvtw2ph  (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtw2ph  4064(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x28,0x7d,0x71,0x7f]
+          vcvtw2ph  4064(%ecx), %ymm6
+
+// CHECK: vcvtw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xbf,0x7d,0x72,0x80]
+          vcvtw2ph  -256(%edx){1to16}, %ymm6 {%k7} {z}

diff  --git a/llvm/test/MC/X86/intel-syntax-avx512fp16.s b/llvm/test/MC/X86/intel-syntax-avx512fp16.s
index 5d95bc82375a0..4b842f9bc622c 100644
--- a/llvm/test/MC/X86/intel-syntax-avx512fp16.s
+++ b/llvm/test/MC/X86/intel-syntax-avx512fp16.s
@@ -459,3 +459,771 @@
 // CHECK: vucomish xmm6, word ptr [edx - 256]
 // CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x2e,0x72,0x80]
           vucomish xmm6, word ptr [edx - 256]
+
+// CHECK: vcvtdq2ph ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0xf5]
+          vcvtdq2ph ymm6, zmm5
+
+// CHECK: vcvtdq2ph ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5b,0xf5]
+          vcvtdq2ph ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtdq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5b,0x31]
+          vcvtdq2ph ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5b,0x71,0x7f]
+          vcvtdq2ph ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5b,0x72,0x80]
+          vcvtdq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtpd2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0xf5]
+          vcvtpd2ph xmm6, zmm5
+
+// CHECK: vcvtpd2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x5a,0xf5]
+          vcvtpd2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtpd2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x5a,0x31]
+          vcvtpd2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x5a,0x71,0x7f]
+          vcvtpd2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x5a,0x72,0x80]
+          vcvtpd2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtph2dq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0xf5]
+          vcvtph2dq zmm6, ymm5
+
+// CHECK: vcvtph2dq zmm6, ymm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x5b,0xf5]
+          vcvtph2dq zmm6, ymm5, {rn-sae}
+
+// CHECK: vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2dq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x5b,0x31]
+          vcvtph2dq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x5b,0x71,0x7f]
+          vcvtph2dq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x5b,0x72,0x80]
+          vcvtph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2pd zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0xf5]
+          vcvtph2pd zmm6, xmm5
+
+// CHECK: vcvtph2pd zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x5a,0xf5]
+          vcvtph2pd zmm6, xmm5, {sae}
+
+// CHECK: vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2pd zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2pd zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x5a,0x31]
+          vcvtph2pd zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x5a,0x71,0x7f]
+          vcvtph2pd zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x5a,0x72,0x80]
+          vcvtph2pd zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2psx zmm6, ymm5
+// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0xf5]
+          vcvtph2psx zmm6, ymm5
+
+// CHECK: vcvtph2psx zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x18,0x13,0xf5]
+          vcvtph2psx zmm6, ymm5, {sae}
+
+// CHECK: vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x7d,0x4f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2psx zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2psx zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7d,0x58,0x13,0x31]
+          vcvtph2psx zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf6,0x7d,0x48,0x13,0x71,0x7f]
+          vcvtph2psx zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf6,0x7d,0xdf,0x13,0x72,0x80]
+          vcvtph2psx zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2qq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0xf5]
+          vcvtph2qq zmm6, xmm5
+
+// CHECK: vcvtph2qq zmm6, xmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7b,0xf5]
+          vcvtph2qq zmm6, xmm5, {rn-sae}
+
+// CHECK: vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2qq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7b,0x31]
+          vcvtph2qq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7b,0x71,0x7f]
+          vcvtph2qq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7b,0x72,0x80]
+          vcvtph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2udq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0xf5]
+          vcvtph2udq zmm6, ymm5
+
+// CHECK: vcvtph2udq zmm6, ymm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x79,0xf5]
+          vcvtph2udq zmm6, ymm5, {rn-sae}
+
+// CHECK: vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2udq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x79,0x31]
+          vcvtph2udq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x79,0x71,0x7f]
+          vcvtph2udq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x79,0x72,0x80]
+          vcvtph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvtph2uqq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0xf5]
+          vcvtph2uqq zmm6, xmm5
+
+// CHECK: vcvtph2uqq zmm6, xmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x79,0xf5]
+          vcvtph2uqq zmm6, xmm5, {rn-sae}
+
+// CHECK: vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x79,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2uqq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x79,0x31]
+          vcvtph2uqq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x79,0x71,0x7f]
+          vcvtph2uqq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x79,0x72,0x80]
+          vcvtph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvtph2uw zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0xf5]
+          vcvtph2uw zmm6, zmm5
+
+// CHECK: vcvtph2uw zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7d,0xf5]
+          vcvtph2uw zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2uw zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7d,0x31]
+          vcvtph2uw zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7d,0x71,0x7f]
+          vcvtph2uw zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7d,0x72,0x80]
+          vcvtph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtph2w zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0xf5]
+          vcvtph2w zmm6, zmm5
+
+// CHECK: vcvtph2w zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7d,0xf5]
+          vcvtph2w zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtph2w zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7d,0x31]
+          vcvtph2w zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtph2w zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7d,0x71,0x7f]
+          vcvtph2w zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7d,0x72,0x80]
+          vcvtph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtps2phx ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0xf5]
+          vcvtps2phx ymm6, zmm5
+
+// CHECK: vcvtps2phx ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x1d,0xf5]
+          vcvtps2phx ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtps2phx ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtps2phx ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x1d,0x31]
+          vcvtps2phx ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x1d,0x71,0x7f]
+          vcvtps2phx ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x1d,0x72,0x80]
+          vcvtps2phx ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtqq2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0xf5]
+          vcvtqq2ph xmm6, zmm5
+
+// CHECK: vcvtqq2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x5b,0xf5]
+          vcvtqq2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x5b,0x31]
+          vcvtqq2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x5b,0x71,0x7f]
+          vcvtqq2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x5b,0x72,0x80]
+          vcvtqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtsd2sh xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0xf4]
+          vcvtsd2sh xmm6, xmm5, xmm4
+
+// CHECK: vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xd7,0x18,0x5a,0xf4]
+          vcvtsd2sh xmm6, xmm5, xmm4, {rn-sae}
+
+// CHECK: vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsd2sh xmm6 {k7}, xmm5, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x31]
+          vcvtsd2sh xmm6, xmm5, qword ptr [ecx]
+
+// CHECK: vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x08,0x5a,0x71,0x7f]
+          vcvtsd2sh xmm6, xmm5, qword ptr [ecx + 1016]
+
+// CHECK: vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf5,0xd7,0x8f,0x5a,0x72,0x80]
+          vcvtsd2sh xmm6 {k7} {z}, xmm5, qword ptr [edx - 1024]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0xf4]
+          vcvtsh2sd xmm6, xmm5, xmm4
+
+// CHECK: vcvtsh2sd xmm6, xmm5, xmm4, {sae}
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x5a,0xf4]
+          vcvtsh2sd xmm6, xmm5, xmm4, {sae}
+
+// CHECK: vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x0f,0x5a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsh2sd xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x31]
+          vcvtsh2sd xmm6, xmm5, word ptr [ecx]
+
+// CHECK: vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x5a,0x71,0x7f]
+          vcvtsh2sd xmm6, xmm5, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x56,0x8f,0x5a,0x72,0x80]
+          vcvtsh2sd xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+
+// CHECK: vcvtsh2si edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0xd6]
+          vcvtsh2si edx, xmm6
+
+// CHECK: vcvtsh2si edx, xmm6, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2d,0xd6]
+          vcvtsh2si edx, xmm6, {rn-sae}
+
+// CHECK: vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsh2si edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2si edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x11]
+          vcvtsh2si edx, word ptr [ecx]
+
+// CHECK: vcvtsh2si edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x51,0x7f]
+          vcvtsh2si edx, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2si edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2d,0x52,0x80]
+          vcvtsh2si edx, word ptr [edx - 256]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0xf4]
+          vcvtsh2ss xmm6, xmm5, xmm4
+
+// CHECK: vcvtsh2ss xmm6, xmm5, xmm4, {sae}
+// CHECK: encoding: [0x62,0xf6,0x54,0x18,0x13,0xf4]
+          vcvtsh2ss xmm6, xmm5, xmm4, {sae}
+
+// CHECK: vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf6,0x54,0x0f,0x13,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsh2ss xmm6 {k7}, xmm5, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x31]
+          vcvtsh2ss xmm6, xmm5, word ptr [ecx]
+
+// CHECK: vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf6,0x54,0x08,0x13,0x71,0x7f]
+          vcvtsh2ss xmm6, xmm5, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf6,0x54,0x8f,0x13,0x72,0x80]
+          vcvtsh2ss xmm6 {k7} {z}, xmm5, word ptr [edx - 256]
+
+// CHECK: vcvtsh2usi edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0xd6]
+          vcvtsh2usi edx, xmm6
+
+// CHECK: vcvtsh2usi edx, xmm6, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x79,0xd6]
+          vcvtsh2usi edx, xmm6, {rn-sae}
+
+// CHECK: vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsh2usi edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsh2usi edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x11]
+          vcvtsh2usi edx, word ptr [ecx]
+
+// CHECK: vcvtsh2usi edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x51,0x7f]
+          vcvtsh2usi edx, word ptr [ecx + 254]
+
+// CHECK: vcvtsh2usi edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x79,0x52,0x80]
+          vcvtsh2usi edx, word ptr [edx - 256]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xf2]
+          vcvtsi2sh xmm6, xmm5, edx
+
+// CHECK: vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x2a,0xf2]
+          vcvtsi2sh xmm6, xmm5, {rn-sae}, edx
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtsi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x31]
+          vcvtsi2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x71,0x7f]
+          vcvtsi2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x2a,0x72,0x80]
+          vcvtsi2sh xmm6, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvtss2sh xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0xf4]
+          vcvtss2sh xmm6, xmm5, xmm4
+
+// CHECK: vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x54,0x18,0x1d,0xf4]
+          vcvtss2sh xmm6, xmm5, xmm4, {rn-sae}
+
+// CHECK: vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x54,0x0f,0x1d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtss2sh xmm6 {k7}, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x31]
+          vcvtss2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x54,0x08,0x1d,0x71,0x7f]
+          vcvtss2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x54,0x8f,0x1d,0x72,0x80]
+          vcvtss2sh xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvttph2dq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0xf5]
+          vcvttph2dq zmm6, ymm5
+
+// CHECK: vcvttph2dq zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x5b,0xf5]
+          vcvttph2dq zmm6, ymm5, {sae}
+
+// CHECK: vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x5b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2dq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2dq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x5b,0x31]
+          vcvttph2dq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x5b,0x71,0x7f]
+          vcvttph2dq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x5b,0x72,0x80]
+          vcvttph2dq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvttph2qq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0xf5]
+          vcvttph2qq zmm6, xmm5
+
+// CHECK: vcvttph2qq zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7a,0xf5]
+          vcvttph2qq zmm6, xmm5, {sae}
+
+// CHECK: vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2qq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2qq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7a,0x31]
+          vcvttph2qq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7a,0x71,0x7f]
+          vcvttph2qq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7a,0x72,0x80]
+          vcvttph2qq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvttph2udq zmm6, ymm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0xf5]
+          vcvttph2udq zmm6, ymm5
+
+// CHECK: vcvttph2udq zmm6, ymm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x78,0xf5]
+          vcvttph2udq zmm6, ymm5, {sae}
+
+// CHECK: vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2udq zmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2udq zmm6, word ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x78,0x31]
+          vcvttph2udq zmm6, word ptr [ecx]{1to16}
+
+// CHECK: vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x78,0x71,0x7f]
+          vcvttph2udq zmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x78,0x72,0x80]
+          vcvttph2udq zmm6 {k7} {z}, word ptr [edx - 256]{1to16}
+
+// CHECK: vcvttph2uqq zmm6, xmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0xf5]
+          vcvttph2uqq zmm6, xmm5
+
+// CHECK: vcvttph2uqq zmm6, xmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x78,0xf5]
+          vcvttph2uqq zmm6, xmm5, {sae}
+
+// CHECK: vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x78,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uqq zmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2uqq zmm6, word ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x78,0x31]
+          vcvttph2uqq zmm6, word ptr [ecx]{1to8}
+
+// CHECK: vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x78,0x71,0x7f]
+          vcvttph2uqq zmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x78,0x72,0x80]
+          vcvttph2uqq zmm6 {k7} {z}, word ptr [edx - 256]{1to8}
+
+// CHECK: vcvttph2uw zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0xf5]
+          vcvttph2uw zmm6, zmm5
+
+// CHECK: vcvttph2uw zmm6, zmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x7c,0xf5]
+          vcvttph2uw zmm6, zmm5, {sae}
+
+// CHECK: vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2uw zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2uw zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x7c,0x31]
+          vcvttph2uw zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x7c,0x71,0x7f]
+          vcvttph2uw zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x7c,0x72,0x80]
+          vcvttph2uw zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvttph2w zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0xf5]
+          vcvttph2w zmm6, zmm5
+
+// CHECK: vcvttph2w zmm6, zmm5, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x7c,0xf5]
+          vcvttph2w zmm6, zmm5, {sae}
+
+// CHECK: vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x7c,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvttph2w zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttph2w zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x7c,0x31]
+          vcvttph2w zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvttph2w zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x7c,0x71,0x7f]
+          vcvttph2w zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x7c,0x72,0x80]
+          vcvttph2w zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvttsh2si edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0xd6]
+          vcvttsh2si edx, xmm6
+
+// CHECK: vcvttsh2si edx, xmm6, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x2c,0xd6]
+          vcvttsh2si edx, xmm6, {sae}
+
+// CHECK: vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vcvttsh2si edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsh2si edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x11]
+          vcvttsh2si edx, word ptr [ecx]
+
+// CHECK: vcvttsh2si edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x51,0x7f]
+          vcvttsh2si edx, word ptr [ecx + 254]
+
+// CHECK: vcvttsh2si edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x2c,0x52,0x80]
+          vcvttsh2si edx, word ptr [edx - 256]
+
+// CHECK: vcvttsh2usi edx, xmm6
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0xd6]
+          vcvttsh2usi edx, xmm6
+
+// CHECK: vcvttsh2usi edx, xmm6, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x78,0xd6]
+          vcvttsh2usi edx, xmm6, {sae}
+
+// CHECK: vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vcvttsh2usi edx, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsh2usi edx, word ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x11]
+          vcvttsh2usi edx, word ptr [ecx]
+
+// CHECK: vcvttsh2usi edx, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x51,0x7f]
+          vcvttsh2usi edx, word ptr [ecx + 254]
+
+// CHECK: vcvttsh2usi edx, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x78,0x52,0x80]
+          vcvttsh2usi edx, word ptr [edx - 256]
+
+// CHECK: vcvtudq2ph ymm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0xf5]
+          vcvtudq2ph ymm6, zmm5
+
+// CHECK: vcvtudq2ph ymm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7a,0xf5]
+          vcvtudq2ph ymm6, zmm5, {rn-sae}
+
+// CHECK: vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtudq2ph ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7a,0x31]
+          vcvtudq2ph ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7a,0x71,0x7f]
+          vcvtudq2ph ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7a,0x72,0x80]
+          vcvtudq2ph ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtuqq2ph xmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0xf5]
+          vcvtuqq2ph xmm6, zmm5
+
+// CHECK: vcvtuqq2ph xmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0xff,0x18,0x7a,0xf5]
+          vcvtuqq2ph xmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xff,0x4f,0x7a,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtuqq2ph xmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xff,0x58,0x7a,0x31]
+          vcvtuqq2ph xmm6, qword ptr [ecx]{1to8}
+
+// CHECK: vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xff,0x48,0x7a,0x71,0x7f]
+          vcvtuqq2ph xmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xff,0xdf,0x7a,0x72,0x80]
+          vcvtuqq2ph xmm6 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvtusi2sh xmm6, xmm5, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xf2]
+          vcvtusi2sh xmm6, xmm5, edx
+
+// CHECK: vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
+// CHECK: encoding: [0x62,0xf5,0x56,0x18,0x7b,0xf2]
+          vcvtusi2sh xmm6, xmm5, {rn-sae}, edx
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtusi2sh xmm6, xmm5, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x31]
+          vcvtusi2sh xmm6, xmm5, dword ptr [ecx]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x71,0x7f]
+          vcvtusi2sh xmm6, xmm5, dword ptr [ecx + 508]
+
+// CHECK: vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x56,0x08,0x7b,0x72,0x80]
+          vcvtusi2sh xmm6, xmm5, dword ptr [edx - 512]
+
+// CHECK: vcvtuw2ph zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0xf5]
+          vcvtuw2ph zmm6, zmm5
+
+// CHECK: vcvtuw2ph zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x7d,0xf5]
+          vcvtuw2ph zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtuw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtuw2ph zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x58,0x7d,0x31]
+          vcvtuw2ph zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x48,0x7d,0x71,0x7f]
+          vcvtuw2ph zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7f,0xdf,0x7d,0x72,0x80]
+          vcvtuw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+
+// CHECK: vcvtw2ph zmm6, zmm5
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0xf5]
+          vcvtw2ph zmm6, zmm5
+
+// CHECK: vcvtw2ph zmm6, zmm5, {rn-sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x7d,0xf5]
+          vcvtw2ph zmm6, zmm5, {rn-sae}
+
+// CHECK: vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x4f,0x7d,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vcvtw2ph zmm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtw2ph zmm6, word ptr [ecx]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x58,0x7d,0x31]
+          vcvtw2ph zmm6, word ptr [ecx]{1to32}
+
+// CHECK: vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x48,0x7d,0x71,0x7f]
+          vcvtw2ph zmm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}
+// CHECK: encoding: [0x62,0xf5,0x7e,0xdf,0x7d,0x72,0x80]
+          vcvtw2ph zmm6 {k7} {z}, word ptr [edx - 256]{1to32}

diff  --git a/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s b/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
index d6ccd32bbfc16..5c53fc376e1cc 100644
--- a/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
+++ b/llvm/test/MC/X86/intel-syntax-avx512fp16vl.s
@@ -279,3 +279,859 @@
 // CHECK: vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
 // CHECK: encoding: [0x62,0x65,0x14,0x97,0x5c,0x72,0x80]
           vsubph xmm30 {k7} {z}, xmm29, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtdq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5b,0xf5]
+          vcvtdq2ph xmm30, xmm29
+
+// CHECK: vcvtdq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5b,0xf5]
+          vcvtdq2ph xmm30, ymm29
+
+// CHECK: vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtdq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5b,0x31]
+          vcvtdq2ph xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5b,0x71,0x7f]
+          vcvtdq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5b,0x72,0x80]
+          vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtdq2ph xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5b,0x31]
+          vcvtdq2ph xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5b,0x71,0x7f]
+          vcvtdq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5b,0x72,0x80]
+          vcvtdq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtpd2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xfd,0x08,0x5a,0xf5]
+          vcvtpd2ph xmm30, xmm29
+
+// CHECK: vcvtpd2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xfd,0x28,0x5a,0xf5]
+          vcvtpd2ph xmm30, ymm29
+
+// CHECK: vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xfd,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtpd2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xfd,0x18,0x5a,0x31]
+          vcvtpd2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xfd,0x08,0x5a,0x71,0x7f]
+          vcvtpd2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xfd,0x9f,0x5a,0x72,0x80]
+          vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtpd2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xfd,0x38,0x5a,0x31]
+          vcvtpd2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xfd,0x28,0x5a,0x71,0x7f]
+          vcvtpd2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xfd,0xbf,0x5a,0x72,0x80]
+          vcvtpd2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvtph2dq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x5b,0xf5]
+          vcvtph2dq xmm30, xmm29
+
+// CHECK: vcvtph2dq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x5b,0xf5]
+          vcvtph2dq ymm30, xmm29
+
+// CHECK: vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2dq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x5b,0x31]
+          vcvtph2dq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2dq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x5b,0x71,0x7f]
+          vcvtph2dq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x5b,0x72,0x80]
+          vcvtph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2dq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x5b,0x31]
+          vcvtph2dq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x5b,0x71,0x7f]
+          vcvtph2dq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x5b,0x72,0x80]
+          vcvtph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2pd xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x5a,0xf5]
+          vcvtph2pd xmm30, xmm29
+
+// CHECK: vcvtph2pd ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x5a,0xf5]
+          vcvtph2pd ymm30, xmm29
+
+// CHECK: vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2pd xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2pd xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x5a,0x31]
+          vcvtph2pd xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2pd xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x5a,0x71,0x7f]
+          vcvtph2pd xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x5a,0x72,0x80]
+          vcvtph2pd xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x5a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2pd ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2pd ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x5a,0x31]
+          vcvtph2pd ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2pd ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x5a,0x71,0x7f]
+          vcvtph2pd ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x5a,0x72,0x80]
+          vcvtph2pd ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2psx xmm30, xmm29
+// CHECK: encoding: [0x62,0x06,0x7d,0x08,0x13,0xf5]
+          vcvtph2psx xmm30, xmm29
+
+// CHECK: vcvtph2psx ymm30, xmm29
+// CHECK: encoding: [0x62,0x06,0x7d,0x28,0x13,0xf5]
+          vcvtph2psx ymm30, xmm29
+
+// CHECK: vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x26,0x7d,0x0f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2psx xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2psx xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x46,0x7d,0x18,0x13,0x31]
+          vcvtph2psx xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2psx xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x66,0x7d,0x08,0x13,0x71,0x7f]
+          vcvtph2psx xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x66,0x7d,0x9f,0x13,0x72,0x80]
+          vcvtph2psx xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x26,0x7d,0x2f,0x13,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2psx ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2psx ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x46,0x7d,0x38,0x13,0x31]
+          vcvtph2psx ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x66,0x7d,0x28,0x13,0x71,0x7f]
+          vcvtph2psx ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x66,0x7d,0xbf,0x13,0x72,0x80]
+          vcvtph2psx ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2qq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7b,0xf5]
+          vcvtph2qq xmm30, xmm29
+
+// CHECK: vcvtph2qq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7b,0xf5]
+          vcvtph2qq ymm30, xmm29
+
+// CHECK: vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2qq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7b,0x31]
+          vcvtph2qq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2qq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7b,0x71,0x7f]
+          vcvtph2qq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7b,0x72,0x80]
+          vcvtph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2qq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7b,0x31]
+          vcvtph2qq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2qq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7b,0x71,0x7f]
+          vcvtph2qq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7b,0x72,0x80]
+          vcvtph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2udq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x79,0xf5]
+          vcvtph2udq xmm30, xmm29
+
+// CHECK: vcvtph2udq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x79,0xf5]
+          vcvtph2udq ymm30, xmm29
+
+// CHECK: vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2udq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x79,0x31]
+          vcvtph2udq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2udq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x79,0x71,0x7f]
+          vcvtph2udq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x79,0x72,0x80]
+          vcvtph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2udq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x79,0x31]
+          vcvtph2udq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x79,0x71,0x7f]
+          vcvtph2udq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x79,0x72,0x80]
+          vcvtph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2uqq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x79,0xf5]
+          vcvtph2uqq xmm30, xmm29
+
+// CHECK: vcvtph2uqq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x79,0xf5]
+          vcvtph2uqq ymm30, xmm29
+
+// CHECK: vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uqq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x79,0x31]
+          vcvtph2uqq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvtph2uqq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x79,0x71,0x7f]
+          vcvtph2uqq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x79,0x72,0x80]
+          vcvtph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x79,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uqq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x79,0x31]
+          vcvtph2uqq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvtph2uqq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x79,0x71,0x7f]
+          vcvtph2uqq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x79,0x72,0x80]
+          vcvtph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvtph2uw xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7d,0xf5]
+          vcvtph2uw xmm30, xmm29
+
+// CHECK: vcvtph2uw ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7d,0xf5]
+          vcvtph2uw ymm30, ymm29
+
+// CHECK: vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uw xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7d,0x31]
+          vcvtph2uw xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7d,0x71,0x7f]
+          vcvtph2uw xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7d,0x72,0x80]
+          vcvtph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2uw ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7d,0x31]
+          vcvtph2uw ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7d,0x71,0x7f]
+          vcvtph2uw ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7d,0x72,0x80]
+          vcvtph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtph2w xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7d,0xf5]
+          vcvtph2w xmm30, xmm29
+
+// CHECK: vcvtph2w ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7d,0xf5]
+          vcvtph2w ymm30, ymm29
+
+// CHECK: vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2w xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7d,0x31]
+          vcvtph2w xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtph2w xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7d,0x71,0x7f]
+          vcvtph2w xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7d,0x72,0x80]
+          vcvtph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtph2w ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7d,0x31]
+          vcvtph2w ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtph2w ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7d,0x71,0x7f]
+          vcvtph2w ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7d,0x72,0x80]
+          vcvtph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtps2phx xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x1d,0xf5]
+          vcvtps2phx xmm30, xmm29
+
+// CHECK: vcvtps2phx xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x1d,0xf5]
+          vcvtps2phx xmm30, ymm29
+
+// CHECK: vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x1d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtps2phx xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x1d,0x31]
+          vcvtps2phx xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x1d,0x71,0x7f]
+          vcvtps2phx xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x1d,0x72,0x80]
+          vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtps2phx xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x1d,0x31]
+          vcvtps2phx xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x1d,0x71,0x7f]
+          vcvtps2phx xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x1d,0x72,0x80]
+          vcvtps2phx xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtqq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xfc,0x08,0x5b,0xf5]
+          vcvtqq2ph xmm30, xmm29
+
+// CHECK: vcvtqq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xfc,0x28,0x5b,0xf5]
+          vcvtqq2ph xmm30, ymm29
+
+// CHECK: vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xfc,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xfc,0x18,0x5b,0x31]
+          vcvtqq2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xfc,0x08,0x5b,0x71,0x7f]
+          vcvtqq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xfc,0x9f,0x5b,0x72,0x80]
+          vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtqq2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xfc,0x38,0x5b,0x31]
+          vcvtqq2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xfc,0x28,0x5b,0x71,0x7f]
+          vcvtqq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xfc,0xbf,0x5b,0x72,0x80]
+          vcvtqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttph2dq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x5b,0xf5]
+          vcvttph2dq xmm30, xmm29
+
+// CHECK: vcvttph2dq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x5b,0xf5]
+          vcvttph2dq ymm30, xmm29
+
+// CHECK: vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2dq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2dq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x5b,0x31]
+          vcvttph2dq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2dq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x5b,0x71,0x7f]
+          vcvttph2dq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x5b,0x72,0x80]
+          vcvttph2dq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x5b,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2dq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2dq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x5b,0x31]
+          vcvttph2dq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x5b,0x71,0x7f]
+          vcvttph2dq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x5b,0x72,0x80]
+          vcvttph2dq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2qq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7a,0xf5]
+          vcvttph2qq xmm30, xmm29
+
+// CHECK: vcvttph2qq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7a,0xf5]
+          vcvttph2qq ymm30, xmm29
+
+// CHECK: vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2qq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2qq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7a,0x31]
+          vcvttph2qq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvttph2qq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7a,0x71,0x7f]
+          vcvttph2qq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7a,0x72,0x80]
+          vcvttph2qq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2qq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2qq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7a,0x31]
+          vcvttph2qq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2qq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7a,0x71,0x7f]
+          vcvttph2qq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7a,0x72,0x80]
+          vcvttph2qq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2udq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x78,0xf5]
+          vcvttph2udq xmm30, xmm29
+
+// CHECK: vcvttph2udq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x78,0xf5]
+          vcvttph2udq ymm30, xmm29
+
+// CHECK: vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2udq xmm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2udq xmm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x78,0x31]
+          vcvttph2udq xmm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2udq xmm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x78,0x71,0x7f]
+          vcvttph2udq xmm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x78,0x72,0x80]
+          vcvttph2udq xmm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2udq ymm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2udq ymm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x78,0x31]
+          vcvttph2udq ymm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x78,0x71,0x7f]
+          vcvttph2udq ymm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x78,0x72,0x80]
+          vcvttph2udq ymm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2uqq xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x78,0xf5]
+          vcvttph2uqq xmm30, xmm29
+
+// CHECK: vcvttph2uqq ymm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x78,0xf5]
+          vcvttph2uqq ymm30, xmm29
+
+// CHECK: vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uqq xmm30 {k7}, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uqq xmm30, word ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x78,0x31]
+          vcvttph2uqq xmm30, word ptr [r9]{1to2}
+
+// CHECK: vcvttph2uqq xmm30, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x78,0x71,0x7f]
+          vcvttph2uqq xmm30, dword ptr [rcx + 508]
+
+// CHECK: vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x78,0x72,0x80]
+          vcvttph2uqq xmm30 {k7} {z}, word ptr [rdx - 256]{1to2}
+
+// CHECK: vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x78,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uqq ymm30 {k7}, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uqq ymm30, word ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x78,0x31]
+          vcvttph2uqq ymm30, word ptr [r9]{1to4}
+
+// CHECK: vcvttph2uqq ymm30, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x78,0x71,0x7f]
+          vcvttph2uqq ymm30, qword ptr [rcx + 1016]
+
+// CHECK: vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x78,0x72,0x80]
+          vcvttph2uqq ymm30 {k7} {z}, word ptr [rdx - 256]{1to4}
+
+// CHECK: vcvttph2uw xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x08,0x7c,0xf5]
+          vcvttph2uw xmm30, xmm29
+
+// CHECK: vcvttph2uw ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7c,0x28,0x7c,0xf5]
+          vcvttph2uw ymm30, ymm29
+
+// CHECK: vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uw xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uw xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7c,0x18,0x7c,0x31]
+          vcvttph2uw xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7c,0x08,0x7c,0x71,0x7f]
+          vcvttph2uw xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7c,0x9f,0x7c,0x72,0x80]
+          vcvttph2uw xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7c,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2uw ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2uw ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7c,0x38,0x7c,0x31]
+          vcvttph2uw ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7c,0x28,0x7c,0x71,0x7f]
+          vcvttph2uw ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7c,0xbf,0x7c,0x72,0x80]
+          vcvttph2uw ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvttph2w xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x08,0x7c,0xf5]
+          vcvttph2w xmm30, xmm29
+
+// CHECK: vcvttph2w ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7d,0x28,0x7c,0xf5]
+          vcvttph2w ymm30, ymm29
+
+// CHECK: vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x0f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2w xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2w xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7d,0x18,0x7c,0x31]
+          vcvttph2w xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvttph2w xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7d,0x08,0x7c,0x71,0x7f]
+          vcvttph2w xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7d,0x9f,0x7c,0x72,0x80]
+          vcvttph2w xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7d,0x2f,0x7c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvttph2w ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttph2w ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7d,0x38,0x7c,0x31]
+          vcvttph2w ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvttph2w ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7d,0x28,0x7c,0x71,0x7f]
+          vcvttph2w ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7d,0xbf,0x7c,0x72,0x80]
+          vcvttph2w ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtudq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7a,0xf5]
+          vcvtudq2ph xmm30, xmm29
+
+// CHECK: vcvtudq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7a,0xf5]
+          vcvtudq2ph xmm30, ymm29
+
+// CHECK: vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtudq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7a,0x31]
+          vcvtudq2ph xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7a,0x71,0x7f]
+          vcvtudq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7a,0x72,0x80]
+          vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtudq2ph xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7a,0x31]
+          vcvtudq2ph xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7a,0x71,0x7f]
+          vcvtudq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7a,0x72,0x80]
+          vcvtudq2ph xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtuqq2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0xff,0x08,0x7a,0xf5]
+          vcvtuqq2ph xmm30, xmm29
+
+// CHECK: vcvtuqq2ph xmm30, ymm29
+// CHECK: encoding: [0x62,0x05,0xff,0x28,0x7a,0xf5]
+          vcvtuqq2ph xmm30, ymm29
+
+// CHECK: vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0xff,0x0f,0x7a,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtuqq2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
+// CHECK: encoding: [0x62,0x45,0xff,0x18,0x7a,0x31]
+          vcvtuqq2ph xmm30, qword ptr [r9]{1to2}
+
+// CHECK: vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0xff,0x08,0x7a,0x71,0x7f]
+          vcvtuqq2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0x65,0xff,0x9f,0x7a,0x72,0x80]
+          vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x45,0xff,0x38,0x7a,0x31]
+          vcvtuqq2ph xmm30, qword ptr [r9]{1to4}
+
+// CHECK: vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0xff,0x28,0x7a,0x71,0x7f]
+          vcvtuqq2ph xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0x65,0xff,0xbf,0x7a,0x72,0x80]
+          vcvtuqq2ph xmm30 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvtuw2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x08,0x7d,0xf5]
+          vcvtuw2ph xmm30, xmm29
+
+// CHECK: vcvtuw2ph ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7f,0x28,0x7d,0xf5]
+          vcvtuw2ph ymm30, ymm29
+
+// CHECK: vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtuw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuw2ph xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7f,0x18,0x7d,0x31]
+          vcvtuw2ph xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7f,0x08,0x7d,0x71,0x7f]
+          vcvtuw2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7f,0x9f,0x7d,0x72,0x80]
+          vcvtuw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7f,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtuw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtuw2ph ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7f,0x38,0x7d,0x31]
+          vcvtuw2ph ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7f,0x28,0x7d,0x71,0x7f]
+          vcvtuw2ph ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7f,0xbf,0x7d,0x72,0x80]
+          vcvtuw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+
+// CHECK: vcvtw2ph xmm30, xmm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x08,0x7d,0xf5]
+          vcvtw2ph xmm30, xmm29
+
+// CHECK: vcvtw2ph ymm30, ymm29
+// CHECK: encoding: [0x62,0x05,0x7e,0x28,0x7d,0xf5]
+          vcvtw2ph ymm30, ymm29
+
+// CHECK: vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x0f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtw2ph xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtw2ph xmm30, word ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x45,0x7e,0x18,0x7d,0x31]
+          vcvtw2ph xmm30, word ptr [r9]{1to8}
+
+// CHECK: vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x65,0x7e,0x08,0x7d,0x71,0x7f]
+          vcvtw2ph xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+// CHECK: encoding: [0x62,0x65,0x7e,0x9f,0x7d,0x72,0x80]
+          vcvtw2ph xmm30 {k7} {z}, word ptr [rdx - 256]{1to8}
+
+// CHECK: vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x25,0x7e,0x2f,0x7d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vcvtw2ph ymm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtw2ph ymm30, word ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x45,0x7e,0x38,0x7d,0x31]
+          vcvtw2ph ymm30, word ptr [r9]{1to16}
+
+// CHECK: vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x65,0x7e,0x28,0x7d,0x71,0x7f]
+          vcvtw2ph ymm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}
+// CHECK: encoding: [0x62,0x65,0x7e,0xbf,0x7d,0x72,0x80]
+          vcvtw2ph ymm30 {k7} {z}, word ptr [rdx - 256]{1to16}


        


More information about the cfe-commits mailing list