[clang] [llvm] [X86][AVX10.2] Support AVX10.2-SATCVT-DS new instructions. (PR #102592)
Malay Sanghi via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 10 01:34:09 PDT 2024
https://github.com/MalaySanghi updated https://github.com/llvm/llvm-project/pull/102592
>From e6c898daa4fc81438bed6060df933ee37ed84342 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Mon, 5 Aug 2024 02:07:24 -0700
Subject: [PATCH 1/8] [X86][AVX10.2] Support saturated converts
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
---
clang/include/clang/Basic/BuiltinsX86.def | 30 +
clang/include/clang/Basic/BuiltinsX86_64.def | 6 +
clang/lib/Headers/CMakeLists.txt | 2 +
clang/lib/Headers/avx10_2_512satcvtdsintrin.h | 302 +++++
clang/lib/Headers/avx10_2satcvtdsintrin.h | 453 +++++++
clang/lib/Headers/immintrin.h | 8 +
clang/lib/Sema/SemaX86.cpp | 26 +
.../X86/avx10_2_512satcvtds-builtins-errors.c | 52 +
.../avx10_2_512satcvtds-builtins-x64-error.c | 76 ++
.../X86/avx10_2_512satcvtds-builtins-x64.c | 184 +++
.../X86/avx10_2_512satcvtds-builtins.c | 183 +++
.../X86/avx10_2satcvtds-builtins-errors.c | 57 +
.../X86/avx10_2satcvtds-builtins-x64.c | 223 ++++
.../CodeGen/X86/avx10_2satcvtds-builtins.c | 220 ++++
llvm/include/llvm/IR/IntrinsicsX86.td | 100 ++
llvm/lib/Target/X86/X86ISelLowering.cpp | 23 +-
llvm/lib/Target/X86/X86ISelLowering.h | 18 +
llvm/lib/Target/X86/X86InstrAVX10.td | 311 +++++
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 12 +
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 66 +-
.../X86/avx10_2_512satcvtds-intrinsics.ll | 548 ++++++++
.../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 115 ++
.../CodeGen/X86/avx10_2satcvtds-intrinsics.ll | 1098 ++++++++++++++++
.../X86/avx10_2satcvtds-x64-intrinsics.ll | 58 +
.../Disassembler/X86/avx10.2-satcvtds-32.txt | 1043 +++++++++++++++
.../Disassembler/X86/avx10.2-satcvtds-64.txt | 1171 +++++++++++++++++
llvm/test/MC/X86/avx10_2satcvtds-32-att.s | 1042 +++++++++++++++
llvm/test/MC/X86/avx10_2satcvtds-32-intel.s | 1042 +++++++++++++++
llvm/test/MC/X86/avx10_2satcvtds-64-att.s | 1170 ++++++++++++++++
llvm/test/MC/X86/avx10_2satcvtds-64-intel.s | 1170 ++++++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 160 +++
31 files changed, 10966 insertions(+), 3 deletions(-)
create mode 100644 clang/lib/Headers/avx10_2_512satcvtdsintrin.h
create mode 100644 clang/lib/Headers/avx10_2satcvtdsintrin.h
create mode 100644 clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
create mode 100755 clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
create mode 100644 clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
create mode 100644 clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
create mode 100644 clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
create mode 100644 clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
create mode 100644 clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
create mode 100644 llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt
create mode 100644 llvm/test/MC/X86/avx10_2satcvtds-32-att.s
create mode 100644 llvm/test/MC/X86/avx10_2satcvtds-32-intel.s
create mode 100644 llvm/test/MC/X86/avx10_2satcvtds-64-att.s
create mode 100644 llvm/test/MC/X86/avx10_2satcvtds-64-intel.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a696cf117908e2..a8639c341d0a43 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2122,6 +2122,36 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniin
TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2si32, "iV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi32, "UiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2si32, "iV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi32, "UiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+
// AVX-NE-CONVERT
TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 5e00916d4b25ae..ed9b17b8bd7b8e 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -99,6 +99,12 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2si64, "OiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi64, "UOiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2si64, "OiV4fIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi64, "UOiV4fIi", "ncV:128:", "avx10.2-512")
+
// UINTR
TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b61aeca6bbc910..9981290628697c 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -150,9 +150,11 @@ set(x86_files
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
avx10_2_512satcvtintrin.h
+ avx10_2_512satcvtdsintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtintrin.h
+ avx10_2satcvtdsintrin.h
avx2intrin.h
avx512bf16intrin.h
avx512bitalgintrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
new file mode 100644
index 00000000000000..e8b815653c3d6e
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -0,0 +1,302 @@
+/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX10_2_512SATCVTDSINTRIN_H
+#define __AVX10_2_512SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
+ __min_vector_width__(512)))
+
+// 512 bit : Double -> Int
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+ (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+ (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+ (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \
+ (const int)(R)))
+
+// 512 bit : Double -> uInt
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+ (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+ (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+ (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \
+ (const int)(R)))
+
+#ifdef __x86_64__
+// 512 bit : Double -> Long
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+ (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+ (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+ (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
+ (const int)(R)))
+
+// 512 bit : Double -> ULong
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+ (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+ (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
+ return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+ (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
+ (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
+ (const int)(R)))
+
+#endif
+
+// 512 bit: Float -> int
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+ (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi32(__m512i W, __mmask16 U, __m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+ (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi32(__mmask16 U, __m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+ (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16) - 1, (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \
+ (const int)(R)))
+
+// 512 bit: Float -> uint
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+ (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu32(__m512i W, __mmask16 U, __m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+ (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu32(__mmask16 U, __m512 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+ (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16) - 1, (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
+ (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \
+ (const int)(R)))
+
+#ifdef __x86_64__
+// 512 bit : float -> long
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+ (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+ (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+ (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
+ (const int)(R)))
+
+// 512 bit : float -> ulong
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+ (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+ (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
+ return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+ (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
+ (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
+ (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
+ (const int)(R)))
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+#endif // __AVX10_2_512SATCVTDSINTRIN_H
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
new file mode 100644
index 00000000000000..5588c9ccfa4319
--- /dev/null
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -0,0 +1,453 @@
+/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2SATCVTDSINTRIN_H
+#define __AVX10_2SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(256)))
+
+#define _mm_cvtt_roundssd_i32(A, R) \
+ ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtt_roundssd_si32(A, R) \
+ ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128d)(A), (const int)(R)))
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundssd_si64(A, R) \
+ ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#define _mm_cvtt_roundssd_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
+ (const int)(R)))
+#endif
+
+#define _mm_cvtt_roundssd_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttssd2usi32((__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundssd_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttssd2usi64((__v2df)(__m128d)(A), \
+ (const int)(R)))
+#endif
+
+#define _mm_cvtt_roundsss_i32(A, R) \
+ ((int)__builtin_ia32_vcvttsss2si32((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtt_roundsss_si32(A, R) \
+ ((int)__builtin_ia32_vcvttsss2si32((__v4sf)(__m128)(A), (const int)(R)))
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsss_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttsss2si64((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtt_roundsss_si64(A, R) \
+ ((long long)__builtin_ia32_vcvttsss2si64((__v4sf)(__m128)(A), (const int)(R)))
+#endif
+
+#define _mm_cvtt_roundsss_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttsss2usi32((__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsss_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttsss2usi64((__v4sf)(__m128)(A), \
+ (const int)(R)))
+#endif
+
+// 128 Bit : Double -> int
+#define _mm_cvttspd_epi32(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epi32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi32(__m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)(__m256d)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : Double -> uint
+#define _mm_cvttspd_epu32(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epu32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ ((__v2df)(__m128d)A), (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> uint
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu32(__m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : Double -> long
+#ifdef __x86_64__
+
+#define _mm_cvttspd_epi64(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epi64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> long
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epi64(__m256i W, __mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)A, (__v4di)W, \
+ (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
+// 128 Bit : Double -> ulong
+#define _mm_cvttspd_epu64(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epu64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> ulong
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu64(__m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epu64(__m256i W, __mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask((__v4df)A, (__v4di)W, \
+ (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+#endif
+
+// 128 Bit : float -> int
+#define _mm_cvttsps_epi32(A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epi32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : float -> int
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A, (__v8si)_mm256_setzero_si256(), U,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : float -> uint
+#define _mm_cvttsps_epu32(A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epu32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : float -> uint
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu32(__m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epu32(__m256i W, __mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 bit : float -> long
+#ifdef __x86_64__
+
+#define _mm_cvttsps_epi64(A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epi64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+/*
+// 256 bit : float -> long
+*/
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epi64(__m256i W, __mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)R))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
+// 128 bit : float -> ulong
+#define _mm_cvttsps_epu64(A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epu64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+/*
+// 256 bit : float -> ulong
+*/
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epu64(__m256i W, __mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)R))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+#endif
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS
+#endif /*__AVX10_2SATCVTDSINTRIN_H*/
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index f570c5752db4b9..ec9ad9201a0d24 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -200,6 +200,14 @@
#include <avx512vlbf16intrin.h>
#endif
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
+#include <avx10_2satcvtdsintrin.h>
+#endif
+
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
+#include <avx10_2_512satcvtdsintrin.h>
+#endif
+
#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index a0756f167deae6..2518e2881476ba 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -45,6 +45,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttsh2si64:
case X86::BI__builtin_ia32_vcvttsh2usi32:
case X86::BI__builtin_ia32_vcvttsh2usi64:
+ case X86::BI__builtin_ia32_vcvttssd2si32:
+ case X86::BI__builtin_ia32_vcvttssd2usi32:
+ case X86::BI__builtin_ia32_vcvttsss2si32:
+ case X86::BI__builtin_ia32_vcvttsss2usi32:
+ case X86::BI__builtin_ia32_vcvttssd2si64:
+ case X86::BI__builtin_ia32_vcvttssd2usi64:
+ case X86::BI__builtin_ia32_vcvttsss2si64:
+ case X86::BI__builtin_ia32_vcvttsss2usi64:
ArgNum = 1;
break;
case X86::BI__builtin_ia32_maxpd512:
@@ -432,6 +440,24 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
ArgNum = 4;
HasRC = true;
break;
+ case X86::BI__builtin_ia32_vcvttpd2dqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2dqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2udqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2udqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2qqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2qqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2uqqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttpd2uqqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2dqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2dqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2udqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2udqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2qqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2qqs512_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2uqqs256_round_mask:
+ case X86::BI__builtin_ia32_vcvttps2uqqs512_round_mask:
+ ArgNum = 3;
+ break;
}
llvm::APSInt Result;
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
new file mode 100644
index 00000000000000..c2e891217fbbcf
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m256i test_mm512_cvtts_roundpd_epi32(__m512d A) {
+ return _mm512_cvtts_roundpd_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) {
+ return _mm512_mask_cvtts_roundpd_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) {
+ return _mm512_maskz_cvtts_roundpd_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_cvtts_roundpd_epu32(__m512d A) {
+ return _mm512_cvtts_roundpd_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) {
+ return _mm512_mask_cvtts_roundpd_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) {
+ return _mm512_maskz_cvtts_roundpd_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epi32(__m512 A) {
+ return _mm512_cvtts_roundps_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi32(__m512i W, __mmask8 U, __m512 A) {
+ return _mm512_mask_cvtts_roundps_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) {
+ return _mm512_maskz_cvtts_roundps_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epu32(__m512 A) {
+ return _mm512_cvtts_roundps_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) {
+ return _mm512_mask_cvtts_roundps_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
+ return _mm512_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
new file mode 100755
index 00000000000000..7c7c94fbf8c89b
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -Wall -Werror -verify
+
+#include <immintrin.h>
+#include <stddef.h>
+
+long long test_mm_cvttssd_si64(__m128d __A) {
+ return _mm_cvtt_roundssd_si64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+long long test_mm_cvttssd_i64(__m128d __A) {
+ return _mm_cvtt_roundssd_i64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+unsigned long long test_mm_cvttssd_u64(__m128d __A) {
+ return _mm_cvtt_roundssd_u64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+float test_mm_cvttsss_i64(__m128 __A) {
+ return _mm_cvtt_roundsss_i64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+long long test_mm_cvttsss_si64(__m128 __A) {
+ return _mm_cvtt_roundsss_si64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+unsigned long long test_mm_cvttsss_u64(__m128 __A) {
+ return _mm_cvtt_roundsss_u64(__A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
+ return _mm512_cvtts_roundpd_epi64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
+ return _mm512_mask_cvtts_roundpd_epi64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
+ return _mm512_maskz_cvtts_roundpd_epi64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
+ return _mm512_cvtts_roundpd_epu64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
+ return _mm512_mask_cvtts_roundpd_epu64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
+ return _mm512_maskz_cvtts_roundpd_epu64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
+ return _mm512_cvtts_roundps_epi64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
+ return _mm512_mask_cvtts_roundps_epi64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
+ return _mm512_maskz_cvtts_roundps_epi64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
+ return _mm512_cvtts_roundps_epu64( A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
+ return _mm512_mask_cvtts_roundps_epu64( W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
+ return _mm512_maskz_cvtts_roundps_epu64( U, A, 22); // expected-error {{invalid rounding argument}}
+}
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
new file mode 100644
index 00000000000000..9e8b7f01c4c816
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+long long test_mm_cvttssd_si64(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_si64(
+ // CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
+ return _mm_cvtt_roundssd_si64(__A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttssd_i64(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_i64(
+ // CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
+ return _mm_cvtt_roundssd_i64(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttssd_u64(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_u64(
+ // CHECK: @llvm.x86.avx512.vcvttssd2usi64(<2 x double>
+ return _mm_cvtt_roundssd_u64(__A, _MM_FROUND_NO_EXC);
+}
+
+float test_mm_cvttsss_i64(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_i64(
+ // CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
+ return _mm_cvtt_roundsss_i64(__A, _MM_FROUND_NO_EXC);
+}
+
+long long test_mm_cvttsss_si64(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_si64(
+ // CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
+ return _mm_cvtt_roundsss_si64(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned long long test_mm_cvttsss_u64(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_u64(
+ // CHECK: @llvm.x86.avx512.vcvttsss2usi64(<4 x float>
+ return _mm_cvtt_roundsss_u64(__A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttspd_epi64(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_cvttspd_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_mask_cvttspd_epi64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_maskz_cvttspd_epi64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundpd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_cvtts_roundpd_epi64(A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_mask_cvtts_roundpd_epi64(W, U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_cvttspd_epu64(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_cvttspd_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_mask_cvttspd_epu64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_maskz_cvttspd_epu64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundpd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_cvtts_roundpd_epu64(A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_mask_cvtts_roundpd_epu64(W, U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_cvttsps_epi64(__m256 A) {
+ // CHECK-LABEL: test_mm512_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_cvttsps_epi64(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_mask_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_mask_cvttsps_epi64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_maskz_cvttsps_epi64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_mask_cvtts_roundps_epi64(W, U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_cvttsps_epu64(__m256 A) {
+ // CHECK-LABEL: test_mm512_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_cvttsps_epu64(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_mask_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_mask_cvttsps_epu64(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_maskz_cvttsps_epu64(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_mask_cvtts_roundps_epu64(W, U, A, _MM_FROUND_NEARBYINT);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ return _mm512_maskz_cvtts_roundps_epu64(U, A, _MM_FROUND_NEARBYINT);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
new file mode 100644
index 00000000000000..c518d0c5d77884
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -0,0 +1,183 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+int test_mm_cvttssd_i32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_i32
+ // CHECK: @llvm.x86.avx512.vcvttssd2si
+ return _mm_cvtt_roundssd_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttssd_si32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_si32(
+ // CHECK: @llvm.x86.avx512.vcvttssd2si(<2 x double>
+ return _mm_cvtt_roundssd_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttssd_u32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_u32(
+ // CHECK: @llvm.x86.avx512.vcvttssd2usi(<2 x double>
+ return _mm_cvtt_roundssd_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_i32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_i32(
+ // CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
+ return _mm_cvtt_roundsss_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_si32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_si32(
+ // CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
+ return _mm_cvtt_roundsss_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttsss_u32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_u32(
+ // CHECK: @llvm.x86.avx512.vcvttsss2usi(<4 x float>
+ return _mm_cvtt_roundsss_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_cvttspd_epi32(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvttspd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_cvttspd_epi32(A);
+}
+
+__m256i test_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvttspd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_mask_cvttspd_epi32(W, U, A);
+}
+
+__m256i test_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttspd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_maskz_cvttspd_epi32(U, A);
+}
+
+__m256i test_mm512_cvtts_roundpd_epi32(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundpd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_cvtts_roundpd_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_mask_cvtts_roundpd_epi32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ return _mm512_maskz_cvtts_roundpd_epi32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_cvttspd_epu32(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvttspd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_cvttspd_epu32(A);
+}
+
+__m256i test_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvttspd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_mask_cvttspd_epu32(W, U, A);
+}
+
+__m256i test_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttspd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_maskz_cvttspd_epu32(U, A);
+}
+
+__m256i test_mm512_cvtts_roundpd_epu32(__m512d A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundpd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_cvtts_roundpd_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_mask_cvtts_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_mask_cvtts_roundpd_epu32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ return _mm512_maskz_cvtts_roundpd_epu32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epi32(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvttsps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_cvttsps_epi32(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epi32(__m512i W, __mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_mask_cvttsps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_mask_cvttsps_epi32(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epi32(__mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttsps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_maskz_cvttsps_epi32(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epi32(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_cvtts_roundps_epi32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epi32(__m512i W, __mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_mask_cvtts_roundps_epi32(W, U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ return _mm512_maskz_cvtts_roundps_epi32(U, A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_cvttsps_epu32(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvttsps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_cvttsps_epu32(A);
+}
+
+__m512i test_mm512_mask_cvttsps_epu32(__m512i W, __mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_mask_cvttsps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_mask_cvttsps_epu32(W, U, A);
+}
+
+__m512i test_mm512_maskz_cvttsps_epu32(__mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvttsps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_maskz_cvttsps_epu32(U, A);
+}
+
+__m512i test_mm512_cvtts_roundps_epu32(__m512 A) {
+ // CHECK-LABEL: test_mm512_cvtts_roundps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_cvtts_roundps_epu32(A, _MM_FROUND_NO_EXC);
+}
+
+__m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_mask_cvtts_roundps_epu32(W, U, A, _MM_FROUND_NO_EXC);
+}
+__m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
+ // CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu32
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ return _mm512_maskz_cvtts_roundps_epu32(U, A, _MM_FROUND_NO_EXC);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
new file mode 100644
index 00000000000000..72d2769dc21067
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -Wall -Werror -verify
+
+unsigned long long test_mm_cvttssd(unsigned long long __A) {
+ return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}}
+}
+
+unsigned long long test_mm_cvttsss(unsigned long long __A) {
+ return _mm_cvttsss(__A); // expected-error {{call to undeclared function '_mm_cvttsss'}}
+}
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm256_cvtts_roundpd_epi32(__m256d A) {
+ return _mm256_cvtts_roundpd_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W, __mmask8 U, __m256d A) {
+ return _mm256_mask_cvtts_roundpd_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A) {
+ return _mm256_maskz_cvtts_roundpd_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_cvtts_roundpd_epu32(__m256d A) {
+ return _mm256_cvtts_roundpd_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W, __mmask8 U, __m256d A) {
+ return _mm256_mask_cvtts_roundpd_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A) {
+ return _mm256_maskz_cvtts_roundpd_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_cvtts_roundps_epi32(__m256 A) {
+ return _mm256_cvtts_roundps_epi32(A, 22); // expected-error {{invalid rounding argument}}
+}
+__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W, __mmask8 U, __m256 A) {
+ return _mm256_mask_cvtts_roundps_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A) {
+ return _mm256_maskz_cvtts_roundps_epi32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_cvtts_roundps_epu32(__m256 A) {
+ return _mm256_cvtts_roundps_epu32(A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W, __mmask8 U, __m256 A) {
+ return _mm256_mask_cvtts_roundps_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}}
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A) {
+ return _mm256_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}}
+}
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
new file mode 100644
index 00000000000000..c72283c449ea9e
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
@@ -0,0 +1,223 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+// 128 bit
+__m128i test_mm_cvttspd_epi64(__m128d A){
+ // CHECK-LABEL: @test_mm_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ return _mm_cvttspd_epi64(A);
+}
+
+__m128i test_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A){
+ // CHECK-LABEL: @test_mm_mask_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ return _mm_mask_cvttspd_epi64(W, U, A);
+}
+
+__m128i test_mm_maskz_cvttspd_epi64(__mmask8 U,__m128d A){
+ // CHECK-LABEL: @test_mm_maskz_cvttspd_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ return _mm_maskz_cvttspd_epi64(U, A);
+}
+
+__m128i test_mm_cvttspd_epu64(__m128d A){
+ // CHECK-LABEL: @test_mm_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
+ return _mm_cvttspd_epu64( A);
+}
+
+__m128i test_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A){
+ // CHECK-LABEL: @test_mm_mask_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
+ return _mm_mask_cvttspd_epu64(W, U, A);
+}
+
+__m128i test_mm_maskz_cvttspd_epu64(__mmask8 U,__m128d A){
+ // CHECK-LABEL: @test_mm_maskz_cvttspd_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
+ return _mm_maskz_cvttspd_epu64(U, A);
+}
+
+// 256 bit
+__m256i test_mm256_cvttspd_epi64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_cvttspd_epi64( A);
+}
+
+__m256i test_mm256_mask_cvttspd_epi64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_mask_cvttspd_epi64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_maskz_cvttspd_epi64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundpd_epi64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_cvtts_roundpd_epi64(A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundpd_epi64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_mask_cvtts_roundpd_epi64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundpd_epi64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_maskz_cvtts_roundpd_epi64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_cvttspd_epu64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_cvttspd_epu64( A);
+}
+
+__m256i test_mm256_mask_cvttspd_epu64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_mask_cvttspd_epu64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_maskz_cvttspd_epu64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundpd_epu64(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_cvtts_roundpd_epu64(A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundpd_epu64(__m256i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_mask_cvtts_roundpd_epu64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundpd_epu64(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_maskz_cvtts_roundpd_epu64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+// 128 bit
+__m128i test_mm_cvttsps_epi64(__m128 A){
+ // CHECK-LABEL: @test_mm_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
+ return _mm_cvttsps_epi64( A);
+}
+
+__m128i test_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A){
+ // CHECK-LABEL: @test_mm_mask_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
+ return _mm_mask_cvttsps_epi64(W, U, A);
+}
+
+__m128i test_mm_maskz_cvttsps_epi64(__mmask8 U,__m128 A){
+ // CHECK-LABEL: @test_mm_maskz_cvttsps_epi64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
+ return _mm_maskz_cvttsps_epi64(U, A);
+}
+
+__m128i test_mm_cvttsps_epu64(__m128 A){
+ // CHECK-LABEL: @test_mm_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
+ return _mm_cvttsps_epu64( A);
+}
+
+__m128i test_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A){
+ // CHECK-LABEL: @test_mm_mask_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
+ return _mm_mask_cvttsps_epu64(W, U, A);
+}
+
+__m128i test_mm_maskz_cvttsps_epu64(__mmask8 U,__m128 A){
+ // CHECK-LABEL: @test_mm_maskz_cvttsps_epu64
+ // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
+ return _mm_maskz_cvttsps_epu64(U, A);
+}
+
+__m256i test_mm256_cvttsps_epi64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_cvttsps_epi64( A);
+}
+
+__m256i test_mm256_mask_cvttsps_epi64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_mask_cvttsps_epi64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_maskz_cvttsps_epi64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundps_epi64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epi64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_mask_cvtts_roundps_epi64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_maskz_cvtts_roundps_epi64(U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_cvttsps_epu64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_cvttsps_epu64( A);
+}
+
+__m256i test_mm256_mask_cvttsps_epu64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_mask_cvttsps_epu64(W,U, A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_maskz_cvttsps_epu64(U, A);
+}
+
+__m256i test_mm256_cvtts_roundps_epu64(__m128 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu64(__m256i W,__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_mask_cvtts_roundps_epu64(W,U,A,_MM_FROUND_NEARBYINT );
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu64(__mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu64
+// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_maskz_cvtts_roundps_epu64(U,A,_MM_FROUND_NEARBYINT );
+}
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
new file mode 100644
index 00000000000000..5eee57ddc6a837
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
@@ -0,0 +1,220 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_cvttspd_epi32(__m128d A){
+// CHECK-LABEL: @test_mm_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+ return _mm_cvttspd_epi32(A);
+}
+
+__m128i test_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_mask_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+ return _mm_mask_cvttspd_epi32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttspd_epi32( __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+ return _mm_maskz_cvttspd_epi32(U,A);
+}
+
+__m128i test_mm256_cvttspd_epi32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_cvttspd_epi32(A);
+}
+
+__m128i test_mm256_mask_cvttspd_epi32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_mask_cvttspd_epi32(W,U,A);
+}
+
+__m128i test_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_maskz_cvttspd_epi32(U,A);
+}
+__m128i test_mm256_cvtts_roundpd_epi32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_cvtts_roundpd_epi32(A, _MM_FROUND_NEARBYINT);
+}
+__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_mask_cvtts_roundpd_epi32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+ return _mm256_maskz_cvtts_roundpd_epi32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttspd_epu32(__m128d A){
+// CHECK-LABEL: @test_mm_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+ return _mm_cvttspd_epu32(A);
+}
+
+__m128i test_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_mask_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+ return _mm_mask_cvttspd_epu32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttspd_epu32( __mmask8 U, __m128d A){
+// CHECK-LABEL: @test_mm_maskz_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+ return _mm_maskz_cvttspd_epu32(U,A);
+}
+
+
+__m128i test_mm256_cvttspd_epu32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_cvttspd_epu32(A);
+}
+
+__m128i test_mm256_mask_cvttspd_epu32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_mask_cvttspd_epu32(W,U,A);
+}
+
+__m128i test_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_maskz_cvttspd_epu32(U,A);
+}
+
+__m128i test_mm256_cvtts_roundpd_epu32(__m256d A){
+// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_cvtts_roundpd_epu32(A, _MM_FROUND_NEARBYINT);
+}
+__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W,__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_mask_cvtts_roundpd_epu32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+ return _mm256_maskz_cvtts_roundpd_epu32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttsps_epi32(__m128 A){
+// CHECK-LABEL: @test_mm_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+ return _mm_cvttsps_epi32(A);
+}
+
+__m128i test_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_mask_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+ return _mm_mask_cvttsps_epi32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttsps_epi32( __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_maskz_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+ return _mm_maskz_cvttsps_epi32(U,A);
+}
+
+
+__m256i test_mm256_cvttsps_epi32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_cvttsps_epi32(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epi32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_mask_cvttsps_epi32(W,U,A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_maskz_cvttsps_epi32(U,A);
+}
+
+
+__m256i test_mm256_cvtts_roundps_epi32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_cvtts_roundps_epi32(A, _MM_FROUND_NEARBYINT);
+}
+__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_mask_cvtts_roundps_epi32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+ return _mm256_maskz_cvtts_roundps_epi32(U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m128i test_mm_cvttsps_epu32(__m128 A){
+// CHECK-LABEL: @test_mm_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+ return _mm_cvttsps_epu32(A);
+}
+
+__m128i test_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_mask_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+ return _mm_mask_cvttsps_epu32(W,U,A);
+}
+
+__m128i test_mm_maskz_cvttsps_epu32( __mmask8 U, __m128 A){
+// CHECK-LABEL: @test_mm_maskz_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+ return _mm_maskz_cvttsps_epu32(U,A);
+}
+
+__m256i test_mm256_cvttsps_epu32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_cvttsps_epu32(A);
+}
+
+__m256i test_mm256_mask_cvttsps_epu32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_mask_cvttsps_epu32(W,U,A);
+}
+
+__m256i test_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_maskz_cvttsps_epu32(U,A);
+}
+
+__m256i test_mm256_cvtts_roundps_epu32(__m256 A){
+// CHECK-LABEL: @test_mm256_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_cvtts_roundps_epu32(A, _MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W,__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_mask_cvtts_roundps_epu32(W,U,A,_MM_FROUND_NEARBYINT);
+}
+
+__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A){
+// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu32
+// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+ return _mm256_maskz_cvtts_roundps_epu32(U,A,_MM_FROUND_NEARBYINT);
+}
+
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 1ab2002f7f6960..98fbd165ee9340 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5520,6 +5520,106 @@ let TargetPrefix = "x86" in {
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
+// conversion with saturation
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_vcvttsss2si : ClangBuiltin<"__builtin_ia32_vcvttsss2si32">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttsss2si64 : ClangBuiltin<"__builtin_ia32_vcvttsss2si64">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttsss2usi : ClangBuiltin<"__builtin_ia32_vcvttsss2usi32">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttsss2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsss2usi64">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttssd2si : ClangBuiltin<"__builtin_ia32_vcvttssd2si32">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttssd2si64 : ClangBuiltin<"__builtin_ia32_vcvttssd2si64">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttssd2usi : ClangBuiltin<"__builtin_ia32_vcvttssd2usi32">,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_vcvttssd2usi64 : ClangBuiltin<"__builtin_ia32_vcvttssd2usi64">,
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_x86_avx512_mask_vcvttpd2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttpd2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2dqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty,llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttpd2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2udqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty,llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttpd2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2qqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty,llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttpd2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttpd2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttps2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2dqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttps2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2udqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttps2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2qqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs128_mask">,
+ DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty,llvm_v2i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvttps2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2uqqs256_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+ def int_x86_avx512_mask_vcvttps2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs512_round_mask">,
+ DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+}
+
//===----------------------------------------------------------------------===//
// SHA intrinsics
let TargetPrefix = "x86" in {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63690487632f88..f4fd8607ba0dc5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -324,7 +324,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- if (Subtarget.hasSSE2()) {
+ if (Subtarget.hasAVX10_2() || Subtarget.hasAVX10_2_512()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
+ }
+ } else if (Subtarget.hasSSE2()) {
// Custom lowering for saturating float to int conversions.
// We handle promotion to larger result types manually.
for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
@@ -34090,6 +34097,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CTEST)
NODE_NAME_CASE(CLOAD)
NODE_NAME_CASE(CSTORE)
+ NODE_NAME_CASE(CVTTS2SIS)
+ NODE_NAME_CASE(CVTTS2UIS)
+ NODE_NAME_CASE(CVTTS2SIS_SAE)
+ NODE_NAME_CASE(CVTTS2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS)
+ NODE_NAME_CASE(MCVTTP2SIS)
+ NODE_NAME_CASE(CVTTP2UIS_SAE)
+ NODE_NAME_CASE(CVTTP2SIS_SAE)
+ NODE_NAME_CASE(CVTTP2UIS)
+ NODE_NAME_CASE(MCVTTP2UIS)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -37502,7 +37519,9 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case X86ISD::VFPROUND:
case X86ISD::VMFPROUND:
case X86ISD::CVTPS2PH:
- case X86ISD::MCVTPS2PH: {
+ case X86ISD::MCVTPS2PH:
+ case X86ISD::MCVTTP2SIS:
+ case X86ISD::MCVTTP2UIS: {
// Truncations/Conversions - upper elements are known zero.
EVT SrcVT = Op.getOperand(0).getValueType();
if (SrcVT.isVector()) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2e7538cb3c1183..a7d65caa914c8c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -649,6 +649,18 @@ namespace llvm {
CVTTP2UI,
CVTTP2SI_SAE,
CVTTP2UI_SAE,
+
+ // Saturation enabled Vector float/double to signed/unsigned
+ // integer with truncation.
+ CVTTP2SIS,
+ CVTTP2UIS,
+ CVTTP2SIS_SAE,
+ CVTTP2UIS_SAE,
+ // Masked versions of above. Used for v2f64 to v4i32.
+ // SRC, PASSTHRU, MASK
+ MCVTTP2SIS,
+ MCVTTP2UIS,
+
// Scalar float/double to signed/unsigned integer with truncation.
CVTTS2SI,
CVTTS2UI,
@@ -659,6 +671,12 @@ namespace llvm {
CVTSI2P,
CVTUI2P,
+ // Scalar float/double to signed/unsigned integer with saturation.
+ CVTTS2SIS,
+ CVTTS2UIS,
+ CVTTS2SIS_SAE,
+ CVTTS2UIS_SAE,
+
// Masked versions of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
MCVTP2SI,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index fe381b37782629..391bccf9dab5d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -624,3 +624,314 @@ defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
avx512vl_i32_info, avx512vl_f32_info,
X86vcvttp2iubsSAE>,
AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
+
+//-------------------------------------------------
+// AVX10 SATCVT-DS instructions
+//-------------------------------------------------
+
+// Convert Double to Signed/Unsigned Doubleword with truncation.
+multiclass avx512_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2, HasAVX10_2_512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasAVX10_2, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ f128mem, VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ }
+
+ let Predicates = [HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE,
+ sched.YMM>, EVEX_V256;
+ }
+
+
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}",
+ (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}",
+ (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}",
+ (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
+}
+
+// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled
+multiclass avx512_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasDQI, HasAVX10_2, HasAVX10_2_512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasAVX10_2, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
+ MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+ let Predicates = [HasDQI, HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info,
+ OpNodeRnd, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasDQI, HasAVX10_2, HasAVX10_2_512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasAVX10_2, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32 (v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32 (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+
+ let Predicates = [HasDQI, HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd,
+ sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode,
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2, HasAVX10_2_512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
+ }
+
+ let Predicates = [HasAVX10_2, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
+ MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+
+ let Predicates = [HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f32x_info,
+ OpNodeSAE, sched.YMM>, EVEX_V256;
+ }
+}
+
+defm VCVTTPD2DQS : avx512_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPD2DQ>,
+ PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
+defm VCVTTPD2UDQS : avx512_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPD2DQ>,
+ REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2DQS : avx512_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PS,
+ EVEX_CD8<32, CD8VF>;
+defm VCVTTPS2UDQS : avx512_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PS, EVEX_CD8<32, CD8VF>;
+defm VCVTTPD2QQS : avx512_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
+ EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2QQS : avx512_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PD,
+ EVEX_CD8<32, CD8VH>;
+defm VCVTTPD2UQQS : avx512_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
+ EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2UQQS : avx512_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PD,
+ EVEX_CD8<32, CD8VH>;
+
+let Predicates = [HasAVX10_2] in {
+// Special patterns to allow use of X86mcvttp2si for masking. Instruction
+// patterns have been disabled with null_frag.
+// Patterns VCVTTPD2DQSZ128
+
+def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
+ (VCVTTPD2DQSZ128rr VR128X:$src)>;
+def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
+ (VCVTTPD2DQSZ128rm addr:$src)>;
+def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTTPD2DQSZ128rmb addr:$src)>;
+def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+// Patterns VCVTTPD2UDQSZ128
+def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTTPD2UDQSZ128rmb addr:$src)>;
+def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))),
+ (VCVTTPD2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTTPD2UDQSZ128rmb addr:$src)>;
+def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
+ (v4i32 VR128X:$src0), VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
+ v4i32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>;
+}
+
+// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation.
+multiclass avx512_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
+ X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
+ SDNode OpNodeInt, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched> {
+ let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in {
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>,
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ let Uses = [MXCSR] in
+ def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+ [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
+ EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
+ def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
+ (ins _SrcRC.IntScalarMemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst,
+ (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>,
+ SIMD_EXC;
+ }
+}
+
+defm VCVTTSS2SIS: avx512_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSS2I>,
+ T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64S: avx512_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSS2I>,
+ REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIS: avx512_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSD2I>,
+ T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64S: avx512_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSD2I>,
+ REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSS2USIS: avx512_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSS2I>,
+ T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64S: avx512_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSS2I>,
+ T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIS: avx512_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSD2I>,
+ T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64S: avx512_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSD2I>,
+ T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6db1cf7c9ee1fd..2ecf8348ed2874 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -665,6 +665,11 @@ def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
def X86cvts2siRnd : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
+def X86cvttss2Int : SDNode<"X86ISD::CVTTS2SIS", SDTSFloatToInt>;
+def X86cvttss2UInt : SDNode<"X86ISD::CVTTS2UIS", SDTSFloatToInt>;
+def X86cvttss2IntSAE : SDNode<"X86ISD::CVTTS2SIS_SAE", SDTSFloatToInt>;
+def X86cvttss2UIntSAE : SDNode<"X86ISD::CVTTS2UIS_SAE", SDTSFloatToInt>;
+
// Vector with rounding mode
// cvtt fp-to-int staff
@@ -674,6 +679,11 @@ def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>;
def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>;
def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>;
+def X86cvttp2sisSAE : SDNode<"X86ISD::CVTTP2SIS_SAE", SDTFloatToInt>;
+def X86cvttp2uisSAE : SDNode<"X86ISD::CVTTP2UIS_SAE", SDTFloatToInt>;
+def X86cvttp2sis : SDNode<"X86ISD::CVTTP2SIS", SDTFloatToInt>;
+def X86cvttp2uis : SDNode<"X86ISD::CVTTP2UIS", SDTFloatToInt>;
+
// cvt fp-to-int staff
def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>;
def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
@@ -729,6 +739,8 @@ def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>;
def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+def X86mcvttp2sis : SDNode<"X86ISD::MCVTTP2SIS", SDTMFloatToInt>;
+def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>;
def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 47be08c8af3efe..c1e58ef3703bf6 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1212,6 +1212,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_128, CVTPD2DQ_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::MCVTTP2SIS),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_128, CVTPD2DQ_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::MCVTTP2SIS),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
@@ -1387,6 +1435,23 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTS2UI_RND),
X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI,
X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512_vcvttssd2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttssd2si64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttssd2usi, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttssd2usi64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttsss2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttsss2si64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttsss2usi, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx512_vcvttsss2usi64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
+
X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA,
X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f64, INTR_TYPE_3OP, ISD::FMA,
@@ -1399,7 +1464,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMADDSUB_RND),
X86_INTRINSIC_DATA(avx512_vfmaddsub_ps_512, INTR_TYPE_3OP, X86ISD::FMADDSUB,
X86ISD::FMADDSUB_RND),
-
X86_INTRINSIC_DATA(avx512_vpdpbusd_128, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
X86_INTRINSIC_DATA(avx512_vpdpbusd_256, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
X86_INTRINSIC_DATA(avx512_vpdpbusd_512, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
new file mode 100644
index 00000000000000..5d3bb704984fb3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
@@ -0,0 +1,548 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6d,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6d,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_512_z(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_undef(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_default(<8 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x double>, <8 x double> * %x0
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_512_z(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512_undef(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512_default(<8 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x double>, <8 x double> * %x0
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
+
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6d,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6d,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_512_z(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_undef(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_default(<8 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs (%eax), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x double>, <8 x double>* %x0
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6c,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6c,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_512_z(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_undef(<8 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+
+define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_default(<8 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs (%eax), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x double>, <8 x double>* %x0
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
+
+
+
+
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6d,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6d,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_512_z(<16 x float> %x0, i16 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_undef(<16 x float> %x0, i16 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_default(<16 x float>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttps2dqs (%eax), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <16 x float>, <16 x float>* %x0
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
+ ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6c,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6c,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_512_z(<16 x float> %x0, i16 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_undef(<16 x float> %x0, i16 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
+ ret <16 x i32> %res
+}
+
+
+define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_default(<16 x float>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttps2dqs (%eax), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <16 x float>, <16 x float>* %x0
+ %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
+ ret <16 x i32> %res
+}
+
+
+declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6d,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6d,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_512_z(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_undef(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_default(<8 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2qqs %ymm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6d,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6c,0xc8]
+; X64-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6c,0xc8]
+; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_512_z(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_512_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_512_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_undef(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ ret <8 x i64> %res
+}
+
+
+define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_default(<8 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2uqqs %ymm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
+ ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
+
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
new file mode 100644
index 00000000000000..4a6556bdc4a919
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
+
+;
+; 32-bit float to signed integer
+;
+
+declare i32 @llvm.fptosi.sat.i32.f32 (float)
+declare i64 @llvm.fptosi.sat.i64.f32 (float)
+
+define i32 @test_signed_i32_f32(float %f) nounwind {
+; X86-LABEL: test_signed_i32_f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttss2sis {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_i32_f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttss2sis %xmm0, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.fptosi.sat.i32.f32(float %f)
+ ret i32 %x
+}
+
+define i64 @test_signed_i64_f32(float %f) nounwind {
+; X86-LABEL: test_signed_i64_f32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT: vcvttps2qq %xmm1, %xmm1
+; X86-NEXT: vmovd %xmm1, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: cmovbl %ecx, %esi
+; X86-NEXT: vpextrd $1, %xmm1, %eax
+; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000
+; X86-NEXT: cmovael %eax, %edi
+; X86-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovbel %edi, %edx
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovbel %esi, %eax
+; X86-NEXT: vucomiss %xmm0, %xmm0
+; X86-NEXT: cmovpl %ecx, %eax
+; X86-NEXT: cmovpl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_i64_f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttss2sis %xmm0, %rax
+; X64-NEXT: retq
+ %x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
+ ret i64 %x
+}
+
+;
+; 64-bit float to signed integer
+;
+
+declare i32 @llvm.fptosi.sat.i32.f64 (double)
+declare i64 @llvm.fptosi.sat.i64.f64 (double)
+
+define i32 @test_signed_i32_f64(double %f) nounwind {
+; X86-LABEL: test_signed_i32_f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttsd2sis {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_i32_f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsd2sis %xmm0, %eax
+; X64-NEXT: retq
+ %x = call i32 @llvm.fptosi.sat.i32.f64(double %f)
+ ret i32 %x
+}
+
+define i64 @test_signed_i64_f64(double %f) nounwind {
+; X86-LABEL: test_signed_i64_f64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; X86-NEXT: vcvttpd2qq %xmm1, %xmm1
+; X86-NEXT: vmovd %xmm1, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: cmovbl %ecx, %esi
+; X86-NEXT: vpextrd $1, %xmm1, %eax
+; X86-NEXT: movl $-2147483648, %edi # imm = 0x80000000
+; X86-NEXT: cmovael %eax, %edi
+; X86-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovbel %edi, %edx
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: cmovbel %esi, %eax
+; X86-NEXT: vucomisd %xmm0, %xmm0
+; X86-NEXT: cmovpl %ecx, %eax
+; X86-NEXT: cmovpl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_i64_f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttsd2sis %xmm0, %rax
+; X64-NEXT: retq
+ %x = call i64 @llvm.fptosi.sat.i64.f64(double %f)
+ ret i64 %x
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
new file mode 100644
index 00000000000000..28457c4e4d6329
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -0,0 +1,1098 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+define i32 @test_x86_avx512_vcvttssd2usi(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttssd2usi:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsd2usis %xmm0, %ecx # encoding: [0x62,0xf5,0x7f,0x08,0x6c,0xc8]
+; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6c,0xc0]
+; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_vcvttssd2si(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttssd2si:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsd2sis %xmm0, %ecx # encoding: [0x62,0xf5,0x7f,0x08,0x6d,0xc8]
+; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6d,0xc0]
+; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call i32 @llvm.x86.avx512.vcvttssd2si(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttssd2si(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.vcvttssd2si(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_vcvttsss2si(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsss2si:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttss2sis {sae}, %xmm0, %ecx # encoding: [0x62,0xf5,0x7e,0x18,0x6d,0xc8]
+; CHECK-NEXT: vcvttss2sis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0xc0]
+; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.vcvttsss2si(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_vcvttsss2si_load(ptr %a0) {
+; X64-LABEL: test_x86_avx512_vcvttsss2si_load:
+; X64: # %bb.0:
+; X64-NEXT: vcvttss2sis (%rdi), %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_x86_avx512_vcvttsss2si_load:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttss2sis (%eax), %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %a1 = load <4 x float>, ptr %a0
+ %res = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a1, i32 4) ;
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_vcvttsss2usi(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsss2usi:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttss2usis {sae}, %xmm0, %ecx # encoding: [0x62,0xf5,0x7e,0x18,0x6c,0xc8]
+; CHECK-NEXT: vcvttss2usis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6c,0xc0]
+; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res0 = call i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float>, i32) nounwind readnone
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x29,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x29,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_256_z(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256_undef(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256_default(<4 x double>* %xptr) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqsy (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2dqsy (%eax), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x0 = load <4 x double>, <4 x double> * %xptr
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 -1, i32 4)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x29,0x6c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x29,0x6c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_256_z(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256_undef(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
+ ret <4 x i32> %res
+}
+
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256_default(<4 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqsy (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6c,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2udqsy (%eax), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6c,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <4 x double>, <4 x double> * %x0
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x10, <4 x i32> undef, i8 -1, i32 4)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6d,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6d,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_256_z(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256_undef(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256_default(<4 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <4 x double>, <4 x double>* %x0
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
+ ret <4 x i64> %res
+}
+
+
+declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
+
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_256_z(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256_undef(<4 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256_default(<4 x double>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <4 x double>, <4 x double>* %x0
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
+ ret <4 x i64> %res
+}
+
+
+declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
+
+
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6d,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6d,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_256_z(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256_undef(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256_default(<8 x float>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttps2dqs (%eax), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x float>, <8 x float>* %x0
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
+ ret <8 x i32> %res
+}
+
+
+declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
+
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_256_z(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256_undef(<8 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ ret <8 x i32> %res
+}
+
+
+define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256_default(<8 x float>* %x0) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256_default:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x07]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256_default:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT: vcvttps2udqs (%eax), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x00]
+; X86-NEXT: retl # encoding: [0xc3]
+ %x10 = load <8 x float>, <8 x float>* %x0
+ %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
+
+
+define <4 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_256_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtt_ps2qqs_256_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2qqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+
+
+
+
+declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
+
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256(<4 x float> %x0, <4 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6c,0xc8]
+; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6c,0xc8]
+; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_256_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_256_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_256_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ ret <4 x i64> %res
+}
+
+
+define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256_default(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2uqqs %xmm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 -1, i32 4)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
+
+
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_128_z(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2dqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128_undef(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
+ ret <4 x i32> %res
+}
+
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128_default(<2 x double> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfc,0x08,0x6d,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_128_z(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2udqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_undef(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
+ ret <4 x i32> %res
+}
+
+
+define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_default(<2 x double> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfc,0x08,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>, <4 x i32>, i8)
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_128_z(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2qqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128_undef(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
+ ret <2 x i64> %res
+}
+
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128_default(<2 x double> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2qqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfd,0x08,0x6d,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_128_z(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_pd2uqqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128_undef(<2 x double> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
+ ret <2 x i64> %res
+}
+
+
+define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128_default(<2 x double> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttpd2uqqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfd,0x08,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_default(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6d,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128(<4 x float> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_128_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2dqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2dqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_default(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2dqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2dqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6d,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>, <4 x i32>, i8)
+
+
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128(<4 x float> %x0, <4 x i32> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_128_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2udqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_default(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2udqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>, <4 x i32>, i8)
+
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_128_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128(<4 x float> %x0, <2 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6d,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6d,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128(<4 x float> %x0, <2 x i64> %src, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6c,0xc8]
+; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6c,0xc8]
+; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128_undef(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128_undef:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128_undef:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128_default(<4 x float> %x0) {
+; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128_default:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6c,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_128_z(<4 x float> %x0, i8 %mask) {
+; X64-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_128_z:
+; X64: # %bb.0:
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_maskz_vcvtt_ps2uqqs_128_z:
+; X86: # %bb.0:
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>, <2 x i64>, i8)
+
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
new file mode 100644
index 00000000000000..5cf613e89ba502
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s
+
+define i64 @test_x86_avx512_vcvttsd2si64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsd2si64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsd2sis %xmm0, %rcx # encoding: [0x62,0xf5,0xff,0x08,0x6d,0xc8]
+; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6d,0xc0]
+; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res0 = call i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_vcvttsd2usi64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsd2usi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttsd2usis %xmm0, %rcx # encoding: [0x62,0xf5,0xff,0x08,0x6c,0xc8]
+; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6c,0xc0]
+; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res0 = call i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_vcvttsss2si64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsss2si64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttss2sis %xmm0, %rcx # encoding: [0x62,0xf5,0xfe,0x08,0x6d,0xc8]
+; CHECK-NEXT: vcvttss2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6d,0xc0]
+; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res0 = call i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_vcvttsss2usi64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsss2usi64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcvttss2usis %xmm0, %rcx # encoding: [0x62,0xf5,0xfe,0x08,0x6c,0xc8]
+; CHECK-NEXT: vcvttss2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6c,0xc0]
+; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res0 = call i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float>, i32) nounwind readnone
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt
new file mode 100644
index 00000000000000..b2b8267618c183
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt
@@ -0,0 +1,1043 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcvttpd2dqs %xmm3, %xmm2
+# INTEL: vcvttpd2dqs xmm2, xmm3
+0x62,0xf5,0xfc,0x08,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttpd2dqs xmm2 {k7}, xmm3
+0x62,0xf5,0xfc,0x0f,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0xfc,0x8f,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %ymm3, %xmm2
+# INTEL: vcvttpd2dqs xmm2, ymm3
+0x62,0xf5,0xfc,0x28,0x6d,0xd3
+
+# ATT: vcvttpd2dqs {sae}, %ymm3, %xmm2
+# INTEL: vcvttpd2dqs xmm2, ymm3, {sae}
+0x62,0xf5,0xf8,0x18,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %ymm3, %xmm2 {%k7}
+# INTEL: vcvttpd2dqs xmm2 {k7}, ymm3
+0x62,0xf5,0xfc,0x2f,0x6d,0xd3
+
+# ATT: vcvttpd2dqs {sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0xf8,0x9f,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %zmm3, %ymm2
+# INTEL: vcvttpd2dqs ymm2, zmm3
+0x62,0xf5,0xfc,0x48,0x6d,0xd3
+
+# ATT: vcvttpd2dqs {sae}, %zmm3, %ymm2
+# INTEL: vcvttpd2dqs ymm2, zmm3, {sae}
+0x62,0xf5,0xfc,0x18,0x6d,0xd3
+
+# ATT: vcvttpd2dqs %zmm3, %ymm2 {%k7}
+# INTEL: vcvttpd2dqs ymm2 {k7}, zmm3
+0x62,0xf5,0xfc,0x4f,0x6d,0xd3
+
+# ATT: vcvttpd2dqs {sae}, %zmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0xfc,0x9f,0x6d,0xd3
+
+# ATT: vcvttpd2dqsx 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttpd2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2dqsx 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttpd2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfc,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2dqs (%eax){1to2}, %xmm2
+# INTEL: vcvttpd2dqs xmm2, qword ptr [eax]{1to2}
+0x62,0xf5,0xfc,0x18,0x6d,0x10
+
+# ATT: vcvttpd2dqsx -512(,%ebp,2), %xmm2
+# INTEL: vcvttpd2dqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0xfc,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2dqsx 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfc,0x8f,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2dqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfc,0x9f,0x6d,0x52,0x80
+
+# ATT: vcvttpd2dqs (%eax){1to4}, %xmm2
+# INTEL: vcvttpd2dqs xmm2, qword ptr [eax]{1to4}
+0x62,0xf5,0xfc,0x38,0x6d,0x10
+
+# ATT: vcvttpd2dqsy -1024(,%ebp,2), %xmm2
+# INTEL: vcvttpd2dqs xmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0xfc,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2dqsy 4064(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfc,0xaf,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2dqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfc,0xbf,0x6d,0x52,0x80
+
+# ATT: vcvttpd2dqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttpd2dqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttpd2dqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfc,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2dqs (%eax){1to8}, %ymm2
+# INTEL: vcvttpd2dqs ymm2, qword ptr [eax]{1to8}
+0x62,0xf5,0xfc,0x58,0x6d,0x10
+
+# ATT: vcvttpd2dqs -2048(,%ebp,2), %ymm2
+# INTEL: vcvttpd2dqs ymm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0xfc,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2dqs 8128(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0xfc,0xcf,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2dqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+0x62,0xf5,0xfc,0xdf,0x6d,0x52,0x80
+
+# ATT: vcvttpd2qqs %xmm3, %xmm2
+# INTEL: vcvttpd2qqs xmm2, xmm3
+0x62,0xf5,0xfd,0x08,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttpd2qqs xmm2 {k7}, xmm3
+0x62,0xf5,0xfd,0x0f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0xfd,0x8f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %ymm3, %ymm2
+# INTEL: vcvttpd2qqs ymm2, ymm3
+0x62,0xf5,0xfd,0x28,0x6d,0xd3
+
+# ATT: vcvttpd2qqs {sae}, %ymm3, %ymm2
+# INTEL: vcvttpd2qqs ymm2, ymm3, {sae}
+0x62,0xf5,0xf9,0x18,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %ymm3, %ymm2 {%k7}
+# INTEL: vcvttpd2qqs ymm2 {k7}, ymm3
+0x62,0xf5,0xfd,0x2f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0xf9,0x9f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %zmm3, %zmm2
+# INTEL: vcvttpd2qqs zmm2, zmm3
+0x62,0xf5,0xfd,0x48,0x6d,0xd3
+
+# ATT: vcvttpd2qqs {sae}, %zmm3, %zmm2
+# INTEL: vcvttpd2qqs zmm2, zmm3, {sae}
+0x62,0xf5,0xfd,0x18,0x6d,0xd3
+
+# ATT: vcvttpd2qqs %zmm3, %zmm2 {%k7}
+# INTEL: vcvttpd2qqs zmm2 {k7}, zmm3
+0x62,0xf5,0xfd,0x4f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs {sae}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0xfd,0x9f,0x6d,0xd3
+
+# ATT: vcvttpd2qqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttpd2qqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttpd2qqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%eax){1to2}, %xmm2
+# INTEL: vcvttpd2qqs xmm2, qword ptr [eax]{1to2}
+0x62,0xf5,0xfd,0x18,0x6d,0x10
+
+# ATT: vcvttpd2qqs -512(,%ebp,2), %xmm2
+# INTEL: vcvttpd2qqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0xfd,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2qqs 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfd,0x8f,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2qqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfd,0x9f,0x6d,0x52,0x80
+
+# ATT: vcvttpd2qqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttpd2qqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttpd2qqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%eax){1to4}, %ymm2
+# INTEL: vcvttpd2qqs ymm2, qword ptr [eax]{1to4}
+0x62,0xf5,0xfd,0x38,0x6d,0x10
+
+# ATT: vcvttpd2qqs -1024(,%ebp,2), %ymm2
+# INTEL: vcvttpd2qqs ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0xfd,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2qqs 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfd,0xaf,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2qqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfd,0xbf,0x6d,0x52,0x80
+
+# ATT: vcvttpd2qqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttpd2qqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttpd2qqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%eax){1to8}, %zmm2
+# INTEL: vcvttpd2qqs zmm2, qword ptr [eax]{1to8}
+0x62,0xf5,0xfd,0x58,0x6d,0x10
+
+# ATT: vcvttpd2qqs -2048(,%ebp,2), %zmm2
+# INTEL: vcvttpd2qqs zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0xfd,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2qqs 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0xfd,0xcf,0x6d,0x51,0x7f
+
+# ATT: vcvttpd2qqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+0x62,0xf5,0xfd,0xdf,0x6d,0x52,0x80
+
+# ATT: vcvttpd2udqs %xmm3, %xmm2
+# INTEL: vcvttpd2udqs xmm2, xmm3
+0x62,0xf5,0xfc,0x08,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttpd2udqs xmm2 {k7}, xmm3
+0x62,0xf5,0xfc,0x0f,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0xfc,0x8f,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %ymm3, %xmm2
+# INTEL: vcvttpd2udqs xmm2, ymm3
+0x62,0xf5,0xfc,0x28,0x6c,0xd3
+
+# ATT: vcvttpd2udqs {sae}, %ymm3, %xmm2
+# INTEL: vcvttpd2udqs xmm2, ymm3, {sae}
+0x62,0xf5,0xf8,0x18,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %ymm3, %xmm2 {%k7}
+# INTEL: vcvttpd2udqs xmm2 {k7}, ymm3
+0x62,0xf5,0xfc,0x2f,0x6c,0xd3
+
+# ATT: vcvttpd2udqs {sae}, %ymm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0xf8,0x9f,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %zmm3, %ymm2
+# INTEL: vcvttpd2udqs ymm2, zmm3
+0x62,0xf5,0xfc,0x48,0x6c,0xd3
+
+# ATT: vcvttpd2udqs {sae}, %zmm3, %ymm2
+# INTEL: vcvttpd2udqs ymm2, zmm3, {sae}
+0x62,0xf5,0xfc,0x18,0x6c,0xd3
+
+# ATT: vcvttpd2udqs %zmm3, %ymm2 {%k7}
+# INTEL: vcvttpd2udqs ymm2 {k7}, zmm3
+0x62,0xf5,0xfc,0x4f,0x6c,0xd3
+
+# ATT: vcvttpd2udqs {sae}, %zmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0xfc,0x9f,0x6c,0xd3
+
+# ATT: vcvttpd2udqsx 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttpd2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2udqsx 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttpd2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfc,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2udqs (%eax){1to2}, %xmm2
+# INTEL: vcvttpd2udqs xmm2, qword ptr [eax]{1to2}
+0x62,0xf5,0xfc,0x18,0x6c,0x10
+
+# ATT: vcvttpd2udqsx -512(,%ebp,2), %xmm2
+# INTEL: vcvttpd2udqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0xfc,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2udqsx 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfc,0x8f,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2udqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfc,0x9f,0x6c,0x52,0x80
+
+# ATT: vcvttpd2udqs (%eax){1to4}, %xmm2
+# INTEL: vcvttpd2udqs xmm2, qword ptr [eax]{1to4}
+0x62,0xf5,0xfc,0x38,0x6c,0x10
+
+# ATT: vcvttpd2udqsy -1024(,%ebp,2), %xmm2
+# INTEL: vcvttpd2udqs xmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0xfc,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2udqsy 4064(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfc,0xaf,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2udqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfc,0xbf,0x6c,0x52,0x80
+
+# ATT: vcvttpd2udqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttpd2udqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfc,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttpd2udqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfc,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2udqs (%eax){1to8}, %ymm2
+# INTEL: vcvttpd2udqs ymm2, qword ptr [eax]{1to8}
+0x62,0xf5,0xfc,0x58,0x6c,0x10
+
+# ATT: vcvttpd2udqs -2048(,%ebp,2), %ymm2
+# INTEL: vcvttpd2udqs ymm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0xfc,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2udqs 8128(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0xfc,0xcf,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2udqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+0x62,0xf5,0xfc,0xdf,0x6c,0x52,0x80
+
+# ATT: vcvttpd2uqqs %xmm3, %xmm2
+# INTEL: vcvttpd2uqqs xmm2, xmm3
+0x62,0xf5,0xfd,0x08,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttpd2uqqs xmm2 {k7}, xmm3
+0x62,0xf5,0xfd,0x0f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0xfd,0x8f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %ymm3, %ymm2
+# INTEL: vcvttpd2uqqs ymm2, ymm3
+0x62,0xf5,0xfd,0x28,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs {sae}, %ymm3, %ymm2
+# INTEL: vcvttpd2uqqs ymm2, ymm3, {sae}
+0x62,0xf5,0xf9,0x18,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %ymm3, %ymm2 {%k7}
+# INTEL: vcvttpd2uqqs ymm2 {k7}, ymm3
+0x62,0xf5,0xfd,0x2f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0xf9,0x9f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %zmm3, %zmm2
+# INTEL: vcvttpd2uqqs zmm2, zmm3
+0x62,0xf5,0xfd,0x48,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs {sae}, %zmm3, %zmm2
+# INTEL: vcvttpd2uqqs zmm2, zmm3, {sae}
+0x62,0xf5,0xfd,0x18,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs %zmm3, %zmm2 {%k7}
+# INTEL: vcvttpd2uqqs zmm2 {k7}, zmm3
+0x62,0xf5,0xfd,0x4f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs {sae}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0xfd,0x9f,0x6c,0xd3
+
+# ATT: vcvttpd2uqqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttpd2uqqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttpd2uqqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%eax){1to2}, %xmm2
+# INTEL: vcvttpd2uqqs xmm2, qword ptr [eax]{1to2}
+0x62,0xf5,0xfd,0x18,0x6c,0x10
+
+# ATT: vcvttpd2uqqs -512(,%ebp,2), %xmm2
+# INTEL: vcvttpd2uqqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0xfd,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2uqqs 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0xfd,0x8f,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+0x62,0xf5,0xfd,0x9f,0x6c,0x52,0x80
+
+# ATT: vcvttpd2uqqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttpd2uqqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttpd2uqqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%eax){1to4}, %ymm2
+# INTEL: vcvttpd2uqqs ymm2, qword ptr [eax]{1to4}
+0x62,0xf5,0xfd,0x38,0x6c,0x10
+
+# ATT: vcvttpd2uqqs -1024(,%ebp,2), %ymm2
+# INTEL: vcvttpd2uqqs ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0xfd,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2uqqs 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0xfd,0xaf,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+0x62,0xf5,0xfd,0xbf,0x6c,0x52,0x80
+
+# ATT: vcvttpd2uqqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttpd2uqqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0xfd,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttpd2uqqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0xfd,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%eax){1to8}, %zmm2
+# INTEL: vcvttpd2uqqs zmm2, qword ptr [eax]{1to8}
+0x62,0xf5,0xfd,0x58,0x6c,0x10
+
+# ATT: vcvttpd2uqqs -2048(,%ebp,2), %zmm2
+# INTEL: vcvttpd2uqqs zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0xfd,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2uqqs 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0xfd,0xcf,0x6c,0x51,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+0x62,0xf5,0xfd,0xdf,0x6c,0x52,0x80
+
+# ATT: vcvttps2dqs %xmm3, %xmm2
+# INTEL: vcvttps2dqs xmm2, xmm3
+0x62,0xf5,0x7c,0x08,0x6d,0xd3
+
+# ATT: vcvttps2dqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttps2dqs xmm2 {k7}, xmm3
+0x62,0xf5,0x7c,0x0f,0x6d,0xd3
+
+# ATT: vcvttps2dqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttps2dqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7c,0x8f,0x6d,0xd3
+
+# ATT: vcvttps2dqs %ymm3, %ymm2
+# INTEL: vcvttps2dqs ymm2, ymm3
+0x62,0xf5,0x7c,0x28,0x6d,0xd3
+
+# ATT: vcvttps2dqs {sae}, %ymm3, %ymm2
+# INTEL: vcvttps2dqs ymm2, ymm3, {sae}
+0x62,0xf5,0x78,0x18,0x6d,0xd3
+
+# ATT: vcvttps2dqs %ymm3, %ymm2 {%k7}
+# INTEL: vcvttps2dqs ymm2 {k7}, ymm3
+0x62,0xf5,0x7c,0x2f,0x6d,0xd3
+
+# ATT: vcvttps2dqs {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2dqs ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x78,0x9f,0x6d,0xd3
+
+# ATT: vcvttps2dqs %zmm3, %zmm2
+# INTEL: vcvttps2dqs zmm2, zmm3
+0x62,0xf5,0x7c,0x48,0x6d,0xd3
+
+# ATT: vcvttps2dqs {sae}, %zmm3, %zmm2
+# INTEL: vcvttps2dqs zmm2, zmm3, {sae}
+0x62,0xf5,0x7c,0x18,0x6d,0xd3
+
+# ATT: vcvttps2dqs %zmm3, %zmm2 {%k7}
+# INTEL: vcvttps2dqs zmm2 {k7}, zmm3
+0x62,0xf5,0x7c,0x4f,0x6d,0xd3
+
+# ATT: vcvttps2dqs {sae}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vcvttps2dqs zmm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0x7c,0x9f,0x6d,0xd3
+
+# ATT: vcvttps2dqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttps2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttps2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%eax){1to4}, %xmm2
+# INTEL: vcvttps2dqs xmm2, dword ptr [eax]{1to4}
+0x62,0xf5,0x7c,0x18,0x6d,0x10
+
+# ATT: vcvttps2dqs -512(,%ebp,2), %xmm2
+# INTEL: vcvttps2dqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7c,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2dqs 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttps2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x8f,0x6d,0x51,0x7f
+
+# ATT: vcvttps2dqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+# INTEL: vcvttps2dqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7c,0x9f,0x6d,0x52,0x80
+
+# ATT: vcvttps2dqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttps2dqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttps2dqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%eax){1to8}, %ymm2
+# INTEL: vcvttps2dqs ymm2, dword ptr [eax]{1to8}
+0x62,0xf5,0x7c,0x38,0x6d,0x10
+
+# ATT: vcvttps2dqs -1024(,%ebp,2), %ymm2
+# INTEL: vcvttps2dqs ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7c,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2dqs 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttps2dqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0xaf,0x6d,0x51,0x7f
+
+# ATT: vcvttps2dqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+# INTEL: vcvttps2dqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7c,0xbf,0x6d,0x52,0x80
+
+# ATT: vcvttps2dqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttps2dqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttps2dqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%eax){1to16}, %zmm2
+# INTEL: vcvttps2dqs zmm2, dword ptr [eax]{1to16}
+0x62,0xf5,0x7c,0x58,0x6d,0x10
+
+# ATT: vcvttps2dqs -2048(,%ebp,2), %zmm2
+# INTEL: vcvttps2dqs zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7c,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttps2dqs 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttps2dqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7c,0xcf,0x6d,0x51,0x7f
+
+# ATT: vcvttps2dqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+# INTEL: vcvttps2dqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+0x62,0xf5,0x7c,0xdf,0x6d,0x52,0x80
+
+# ATT: vcvttps2qqs %xmm3, %xmm2
+# INTEL: vcvttps2qqs xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x6d,0xd3
+
+# ATT: vcvttps2qqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttps2qqs xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x6d,0xd3
+
+# ATT: vcvttps2qqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttps2qqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x6d,0xd3
+
+# ATT: vcvttps2qqs %xmm3, %ymm2
+# INTEL: vcvttps2qqs ymm2, xmm3
+0x62,0xf5,0x7d,0x28,0x6d,0xd3
+
+# ATT: vcvttps2qqs {sae}, %xmm3, %ymm2
+# INTEL: vcvttps2qqs ymm2, xmm3, {sae}
+0x62,0xf5,0x79,0x18,0x6d,0xd3
+
+# ATT: vcvttps2qqs %xmm3, %ymm2 {%k7}
+# INTEL: vcvttps2qqs ymm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x2f,0x6d,0xd3
+
+# ATT: vcvttps2qqs {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2qqs ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x79,0x9f,0x6d,0xd3
+
+# ATT: vcvttps2qqs %ymm3, %zmm2
+# INTEL: vcvttps2qqs zmm2, ymm3
+0x62,0xf5,0x7d,0x48,0x6d,0xd3
+
+# ATT: vcvttps2qqs {sae}, %ymm3, %zmm2
+# INTEL: vcvttps2qqs zmm2, ymm3, {sae}
+0x62,0xf5,0x7d,0x18,0x6d,0xd3
+
+# ATT: vcvttps2qqs %ymm3, %zmm2 {%k7}
+# INTEL: vcvttps2qqs zmm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x4f,0x6d,0xd3
+
+# ATT: vcvttps2qqs {sae}, %ymm3, %zmm2 {%k7} {z}
+# INTEL: vcvttps2qqs zmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x7d,0x9f,0x6d,0xd3
+
+# ATT: vcvttps2qqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttps2qqs xmm2, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttps2qqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%eax){1to2}, %xmm2
+# INTEL: vcvttps2qqs xmm2, dword ptr [eax]{1to2}
+0x62,0xf5,0x7d,0x18,0x6d,0x10
+
+# ATT: vcvttps2qqs -256(,%ebp,2), %xmm2
+# INTEL: vcvttps2qqs xmm2, qword ptr [2*ebp - 256]
+0x62,0xf5,0x7d,0x08,0x6d,0x14,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttps2qqs 1016(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttps2qqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x8f,0x6d,0x51,0x7f
+
+# ATT: vcvttps2qqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttps2qqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+0x62,0xf5,0x7d,0x9f,0x6d,0x52,0x80
+
+# ATT: vcvttps2qqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttps2qqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttps2qqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%eax){1to4}, %ymm2
+# INTEL: vcvttps2qqs ymm2, dword ptr [eax]{1to4}
+0x62,0xf5,0x7d,0x38,0x6d,0x10
+
+# ATT: vcvttps2qqs -512(,%ebp,2), %ymm2
+# INTEL: vcvttps2qqs ymm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x28,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2qqs 2032(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttps2qqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0xaf,0x6d,0x51,0x7f
+
+# ATT: vcvttps2qqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+# INTEL: vcvttps2qqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7d,0xbf,0x6d,0x52,0x80
+
+# ATT: vcvttps2qqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttps2qqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttps2qqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%eax){1to8}, %zmm2
+# INTEL: vcvttps2qqs zmm2, dword ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x58,0x6d,0x10
+
+# ATT: vcvttps2qqs -1024(,%ebp,2), %zmm2
+# INTEL: vcvttps2qqs zmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x48,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2qqs 4064(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttps2qqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xcf,0x6d,0x51,0x7f
+
+# ATT: vcvttps2qqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+# INTEL: vcvttps2qqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7d,0xdf,0x6d,0x52,0x80
+
+# ATT: vcvttps2udqs %xmm3, %xmm2
+# INTEL: vcvttps2udqs xmm2, xmm3
+0x62,0xf5,0x7c,0x08,0x6c,0xd3
+
+# ATT: vcvttps2udqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttps2udqs xmm2 {k7}, xmm3
+0x62,0xf5,0x7c,0x0f,0x6c,0xd3
+
+# ATT: vcvttps2udqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttps2udqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7c,0x8f,0x6c,0xd3
+
+# ATT: vcvttps2udqs %ymm3, %ymm2
+# INTEL: vcvttps2udqs ymm2, ymm3
+0x62,0xf5,0x7c,0x28,0x6c,0xd3
+
+# ATT: vcvttps2udqs {sae}, %ymm3, %ymm2
+# INTEL: vcvttps2udqs ymm2, ymm3, {sae}
+0x62,0xf5,0x78,0x18,0x6c,0xd3
+
+# ATT: vcvttps2udqs %ymm3, %ymm2 {%k7}
+# INTEL: vcvttps2udqs ymm2 {k7}, ymm3
+0x62,0xf5,0x7c,0x2f,0x6c,0xd3
+
+# ATT: vcvttps2udqs {sae}, %ymm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2udqs ymm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x78,0x9f,0x6c,0xd3
+
+# ATT: vcvttps2udqs %zmm3, %zmm2
+# INTEL: vcvttps2udqs zmm2, zmm3
+0x62,0xf5,0x7c,0x48,0x6c,0xd3
+
+# ATT: vcvttps2udqs {sae}, %zmm3, %zmm2
+# INTEL: vcvttps2udqs zmm2, zmm3, {sae}
+0x62,0xf5,0x7c,0x18,0x6c,0xd3
+
+# ATT: vcvttps2udqs %zmm3, %zmm2 {%k7}
+# INTEL: vcvttps2udqs zmm2 {k7}, zmm3
+0x62,0xf5,0x7c,0x4f,0x6c,0xd3
+
+# ATT: vcvttps2udqs {sae}, %zmm3, %zmm2 {%k7} {z}
+# INTEL: vcvttps2udqs zmm2 {k7} {z}, zmm3, {sae}
+0x62,0xf5,0x7c,0x9f,0x6c,0xd3
+
+# ATT: vcvttps2udqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttps2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttps2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%eax){1to4}, %xmm2
+# INTEL: vcvttps2udqs xmm2, dword ptr [eax]{1to4}
+0x62,0xf5,0x7c,0x18,0x6c,0x10
+
+# ATT: vcvttps2udqs -512(,%ebp,2), %xmm2
+# INTEL: vcvttps2udqs xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7c,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2udqs 2032(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttps2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7c,0x8f,0x6c,0x51,0x7f
+
+# ATT: vcvttps2udqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+# INTEL: vcvttps2udqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7c,0x9f,0x6c,0x52,0x80
+
+# ATT: vcvttps2udqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttps2udqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttps2udqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%eax){1to8}, %ymm2
+# INTEL: vcvttps2udqs ymm2, dword ptr [eax]{1to8}
+0x62,0xf5,0x7c,0x38,0x6c,0x10
+
+# ATT: vcvttps2udqs -1024(,%ebp,2), %ymm2
+# INTEL: vcvttps2udqs ymm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7c,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2udqs 4064(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttps2udqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7c,0xaf,0x6c,0x51,0x7f
+
+# ATT: vcvttps2udqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+# INTEL: vcvttps2udqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7c,0xbf,0x6c,0x52,0x80
+
+# ATT: vcvttps2udqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttps2udqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7c,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttps2udqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7c,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%eax){1to16}, %zmm2
+# INTEL: vcvttps2udqs zmm2, dword ptr [eax]{1to16}
+0x62,0xf5,0x7c,0x58,0x6c,0x10
+
+# ATT: vcvttps2udqs -2048(,%ebp,2), %zmm2
+# INTEL: vcvttps2udqs zmm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf5,0x7c,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttps2udqs 8128(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttps2udqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf5,0x7c,0xcf,0x6c,0x51,0x7f
+
+# ATT: vcvttps2udqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+# INTEL: vcvttps2udqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+0x62,0xf5,0x7c,0xdf,0x6c,0x52,0x80
+
+# ATT: vcvttps2uqqs %xmm3, %xmm2
+# INTEL: vcvttps2uqqs xmm2, xmm3
+0x62,0xf5,0x7d,0x08,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %xmm3, %xmm2 {%k7}
+# INTEL: vcvttps2uqqs xmm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x0f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %xmm3, %xmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm2 {k7} {z}, xmm3
+0x62,0xf5,0x7d,0x8f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %xmm3, %ymm2
+# INTEL: vcvttps2uqqs ymm2, xmm3
+0x62,0xf5,0x7d,0x28,0x6c,0xd3
+
+# ATT: vcvttps2uqqs {sae}, %xmm3, %ymm2
+# INTEL: vcvttps2uqqs ymm2, xmm3, {sae}
+0x62,0xf5,0x79,0x18,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %xmm3, %ymm2 {%k7}
+# INTEL: vcvttps2uqqs ymm2 {k7}, xmm3
+0x62,0xf5,0x7d,0x2f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs {sae}, %xmm3, %ymm2 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm2 {k7} {z}, xmm3, {sae}
+0x62,0xf5,0x79,0x9f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %ymm3, %zmm2
+# INTEL: vcvttps2uqqs zmm2, ymm3
+0x62,0xf5,0x7d,0x48,0x6c,0xd3
+
+# ATT: vcvttps2uqqs {sae}, %ymm3, %zmm2
+# INTEL: vcvttps2uqqs zmm2, ymm3, {sae}
+0x62,0xf5,0x7d,0x18,0x6c,0xd3
+
+# ATT: vcvttps2uqqs %ymm3, %zmm2 {%k7}
+# INTEL: vcvttps2uqqs zmm2 {k7}, ymm3
+0x62,0xf5,0x7d,0x4f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs {sae}, %ymm3, %zmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm2 {k7} {z}, ymm3, {sae}
+0x62,0xf5,0x7d,0x9f,0x6c,0xd3
+
+# ATT: vcvttps2uqqs 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcvttps2uqqs xmm2, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+# INTEL: vcvttps2uqqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%eax){1to2}, %xmm2
+# INTEL: vcvttps2uqqs xmm2, dword ptr [eax]{1to2}
+0x62,0xf5,0x7d,0x18,0x6c,0x10
+
+# ATT: vcvttps2uqqs -256(,%ebp,2), %xmm2
+# INTEL: vcvttps2uqqs xmm2, qword ptr [2*ebp - 256]
+0x62,0xf5,0x7d,0x08,0x6c,0x14,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttps2uqqs 1016(%ecx), %xmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+0x62,0xf5,0x7d,0x8f,0x6c,0x51,0x7f
+
+# ATT: vcvttps2uqqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+0x62,0xf5,0x7d,0x9f,0x6c,0x52,0x80
+
+# ATT: vcvttps2uqqs 268435456(%esp,%esi,8), %ymm2
+# INTEL: vcvttps2uqqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+# INTEL: vcvttps2uqqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%eax){1to4}, %ymm2
+# INTEL: vcvttps2uqqs ymm2, dword ptr [eax]{1to4}
+0x62,0xf5,0x7d,0x38,0x6c,0x10
+
+# ATT: vcvttps2uqqs -512(,%ebp,2), %ymm2
+# INTEL: vcvttps2uqqs ymm2, xmmword ptr [2*ebp - 512]
+0x62,0xf5,0x7d,0x28,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2uqqs 2032(%ecx), %ymm2 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf5,0x7d,0xaf,0x6c,0x51,0x7f
+
+# ATT: vcvttps2uqqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf5,0x7d,0xbf,0x6c,0x52,0x80
+
+# ATT: vcvttps2uqqs 268435456(%esp,%esi,8), %zmm2
+# INTEL: vcvttps2uqqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7d,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+# INTEL: vcvttps2uqqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7d,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%eax){1to8}, %zmm2
+# INTEL: vcvttps2uqqs zmm2, dword ptr [eax]{1to8}
+0x62,0xf5,0x7d,0x58,0x6c,0x10
+
+# ATT: vcvttps2uqqs -1024(,%ebp,2), %zmm2
+# INTEL: vcvttps2uqqs zmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf5,0x7d,0x48,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2uqqs 4064(%ecx), %zmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf5,0x7d,0xcf,0x6c,0x51,0x7f
+
+# ATT: vcvttps2uqqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf5,0x7d,0xdf,0x6c,0x52,0x80
+
+# ATT: vcvttsd2sis %xmm2, %ecx
+# INTEL: vcvttsd2sis ecx, xmm2
+0x62,0xf5,0x7f,0x08,0x6d,0xca
+
+# ATT: vcvttsd2sis {sae}, %xmm2, %ecx
+# INTEL: vcvttsd2sis ecx, xmm2, {sae}
+0x62,0xf5,0x7f,0x18,0x6d,0xca
+
+# ATT: vcvttsd2sis 268435456(%esp,%esi,8), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2sis 291(%edi,%eax,4), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7f,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2sis (%eax), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [eax]
+0x62,0xf5,0x7f,0x08,0x6d,0x08
+
+# ATT: vcvttsd2sis -256(,%ebp,2), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [2*ebp - 256]
+0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2sis 1016(%ecx), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [ecx + 1016]
+0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttsd2sis -1024(%edx), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [edx - 1024]
+0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttsd2usis %xmm2, %ecx
+# INTEL: vcvttsd2usis ecx, xmm2
+0x62,0xf5,0x7f,0x08,0x6c,0xca
+
+# ATT: vcvttsd2usis {sae}, %xmm2, %ecx
+# INTEL: vcvttsd2usis ecx, xmm2, {sae}
+0x62,0xf5,0x7f,0x18,0x6c,0xca
+
+# ATT: vcvttsd2usis 268435456(%esp,%esi,8), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2usis 291(%edi,%eax,4), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7f,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2usis (%eax), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [eax]
+0x62,0xf5,0x7f,0x08,0x6c,0x08
+
+# ATT: vcvttsd2usis -256(,%ebp,2), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [2*ebp - 256]
+0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2usis 1016(%ecx), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [ecx + 1016]
+0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttsd2usis -1024(%edx), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [edx - 1024]
+0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80
+
+# ATT: vcvttss2sis %xmm2, %ecx
+# INTEL: vcvttss2sis ecx, xmm2
+0x62,0xf5,0x7e,0x08,0x6d,0xca
+
+# ATT: vcvttss2sis {sae}, %xmm2, %ecx
+# INTEL: vcvttss2sis ecx, xmm2, {sae}
+0x62,0xf5,0x7e,0x18,0x6d,0xca
+
+# ATT: vcvttss2sis 268435456(%esp,%esi,8), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2sis 291(%edi,%eax,4), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7e,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2sis (%eax), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [eax]
+0x62,0xf5,0x7e,0x08,0x6d,0x08
+
+# ATT: vcvttss2sis -128(,%ebp,2), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [2*ebp - 128]
+0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2sis 508(%ecx), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [ecx + 508]
+0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttss2sis -512(%edx), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [edx - 512]
+0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttss2usis %xmm2, %ecx
+# INTEL: vcvttss2usis ecx, xmm2
+0x62,0xf5,0x7e,0x08,0x6c,0xca
+
+# ATT: vcvttss2usis {sae}, %xmm2, %ecx
+# INTEL: vcvttss2usis ecx, xmm2, {sae}
+0x62,0xf5,0x7e,0x18,0x6c,0xca
+
+# ATT: vcvttss2usis 268435456(%esp,%esi,8), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7e,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2usis 291(%edi,%eax,4), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7e,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2usis (%eax), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [eax]
+0x62,0xf5,0x7e,0x08,0x6c,0x08
+
+# ATT: vcvttss2usis -128(,%ebp,2), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [2*ebp - 128]
+0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2usis 508(%ecx), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [ecx + 508]
+0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttss2usis -512(%edx), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [edx - 512]
+0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt
new file mode 100644
index 00000000000000..c0c3340dcc4350
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt
@@ -0,0 +1,1171 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcvttpd2dqs %xmm23, %xmm22
+# INTEL: vcvttpd2dqs xmm22, xmm23
+0x62,0xa5,0xfc,0x08,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttpd2dqs xmm22 {k7}, xmm23
+0x62,0xa5,0xfc,0x0f,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0xfc,0x8f,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %ymm23, %xmm22
+# INTEL: vcvttpd2dqs xmm22, ymm23
+0x62,0xa5,0xfc,0x28,0x6d,0xf7
+
+# ATT: vcvttpd2dqs {sae}, %ymm23, %xmm22
+# INTEL: vcvttpd2dqs xmm22, ymm23, {sae}
+0x62,0xa5,0xf8,0x18,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %ymm23, %xmm22 {%k7}
+# INTEL: vcvttpd2dqs xmm22 {k7}, ymm23
+0x62,0xa5,0xfc,0x2f,0x6d,0xf7
+
+# ATT: vcvttpd2dqs {sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0xf8,0x9f,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %zmm23, %ymm22
+# INTEL: vcvttpd2dqs ymm22, zmm23
+0x62,0xa5,0xfc,0x48,0x6d,0xf7
+
+# ATT: vcvttpd2dqs {sae}, %zmm23, %ymm22
+# INTEL: vcvttpd2dqs ymm22, zmm23, {sae}
+0x62,0xa5,0xfc,0x18,0x6d,0xf7
+
+# ATT: vcvttpd2dqs %zmm23, %ymm22 {%k7}
+# INTEL: vcvttpd2dqs ymm22 {k7}, zmm23
+0x62,0xa5,0xfc,0x4f,0x6d,0xf7
+
+# ATT: vcvttpd2dqs {sae}, %zmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0xfc,0x9f,0x6d,0xf7
+
+# ATT: vcvttpd2dqsx 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttpd2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfc,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2dqsx 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttpd2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfc,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2dqs (%rip){1to2}, %xmm22
+# INTEL: vcvttpd2dqs xmm22, qword ptr [rip]{1to2}
+0x62,0xe5,0xfc,0x18,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2dqsx -512(,%rbp,2), %xmm22
+# INTEL: vcvttpd2dqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0xfc,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2dqsx 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0xfc,0x8f,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2dqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+0x62,0xe5,0xfc,0x9f,0x6d,0x72,0x80
+
+# ATT: vcvttpd2dqs (%rip){1to4}, %xmm22
+# INTEL: vcvttpd2dqs xmm22, qword ptr [rip]{1to4}
+0x62,0xe5,0xfc,0x38,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2dqsy -1024(,%rbp,2), %xmm22
+# INTEL: vcvttpd2dqs xmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0xfc,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2dqsy 4064(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0xfc,0xaf,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2dqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+0x62,0xe5,0xfc,0xbf,0x6d,0x72,0x80
+
+# ATT: vcvttpd2dqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttpd2dqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfc,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttpd2dqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfc,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2dqs (%rip){1to8}, %ymm22
+# INTEL: vcvttpd2dqs ymm22, qword ptr [rip]{1to8}
+0x62,0xe5,0xfc,0x58,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2dqs -2048(,%rbp,2), %ymm22
+# INTEL: vcvttpd2dqs ymm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfc,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2dqs 8128(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfc,0xcf,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2dqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2dqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0xe5,0xfc,0xdf,0x6d,0x72,0x80
+
+# ATT: vcvttpd2qqs %xmm23, %xmm22
+# INTEL: vcvttpd2qqs xmm22, xmm23
+0x62,0xa5,0xfd,0x08,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttpd2qqs xmm22 {k7}, xmm23
+0x62,0xa5,0xfd,0x0f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0xfd,0x8f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %ymm23, %ymm22
+# INTEL: vcvttpd2qqs ymm22, ymm23
+0x62,0xa5,0xfd,0x28,0x6d,0xf7
+
+# ATT: vcvttpd2qqs {sae}, %ymm23, %ymm22
+# INTEL: vcvttpd2qqs ymm22, ymm23, {sae}
+0x62,0xa5,0xf9,0x18,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %ymm23, %ymm22 {%k7}
+# INTEL: vcvttpd2qqs ymm22 {k7}, ymm23
+0x62,0xa5,0xfd,0x2f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0xf9,0x9f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %zmm23, %zmm22
+# INTEL: vcvttpd2qqs zmm22, zmm23
+0x62,0xa5,0xfd,0x48,0x6d,0xf7
+
+# ATT: vcvttpd2qqs {sae}, %zmm23, %zmm22
+# INTEL: vcvttpd2qqs zmm22, zmm23, {sae}
+0x62,0xa5,0xfd,0x18,0x6d,0xf7
+
+# ATT: vcvttpd2qqs %zmm23, %zmm22 {%k7}
+# INTEL: vcvttpd2qqs zmm22 {k7}, zmm23
+0x62,0xa5,0xfd,0x4f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs {sae}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0xfd,0x9f,0x6d,0xf7
+
+# ATT: vcvttpd2qqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttpd2qqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttpd2qqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%rip){1to2}, %xmm22
+# INTEL: vcvttpd2qqs xmm22, qword ptr [rip]{1to2}
+0x62,0xe5,0xfd,0x18,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2qqs -512(,%rbp,2), %xmm22
+# INTEL: vcvttpd2qqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0xfd,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2qqs 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0xfd,0x8f,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2qqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+0x62,0xe5,0xfd,0x9f,0x6d,0x72,0x80
+
+# ATT: vcvttpd2qqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttpd2qqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttpd2qqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%rip){1to4}, %ymm22
+# INTEL: vcvttpd2qqs ymm22, qword ptr [rip]{1to4}
+0x62,0xe5,0xfd,0x38,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2qqs -1024(,%rbp,2), %ymm22
+# INTEL: vcvttpd2qqs ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0xfd,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2qqs 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0xfd,0xaf,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2qqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2qqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+0x62,0xe5,0xfd,0xbf,0x6d,0x72,0x80
+
+# ATT: vcvttpd2qqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttpd2qqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttpd2qqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2qqs (%rip){1to8}, %zmm22
+# INTEL: vcvttpd2qqs zmm22, qword ptr [rip]{1to8}
+0x62,0xe5,0xfd,0x58,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2qqs -2048(,%rbp,2), %zmm22
+# INTEL: vcvttpd2qqs zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfd,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2qqs 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfd,0xcf,0x6d,0x71,0x7f
+
+# ATT: vcvttpd2qqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+# INTEL: vcvttpd2qqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0xe5,0xfd,0xdf,0x6d,0x72,0x80
+
+# ATT: vcvttpd2udqs %xmm23, %xmm22
+# INTEL: vcvttpd2udqs xmm22, xmm23
+0x62,0xa5,0xfc,0x08,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttpd2udqs xmm22 {k7}, xmm23
+0x62,0xa5,0xfc,0x0f,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0xfc,0x8f,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %ymm23, %xmm22
+# INTEL: vcvttpd2udqs xmm22, ymm23
+0x62,0xa5,0xfc,0x28,0x6c,0xf7
+
+# ATT: vcvttpd2udqs {sae}, %ymm23, %xmm22
+# INTEL: vcvttpd2udqs xmm22, ymm23, {sae}
+0x62,0xa5,0xf8,0x18,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %ymm23, %xmm22 {%k7}
+# INTEL: vcvttpd2udqs xmm22 {k7}, ymm23
+0x62,0xa5,0xfc,0x2f,0x6c,0xf7
+
+# ATT: vcvttpd2udqs {sae}, %ymm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0xf8,0x9f,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %zmm23, %ymm22
+# INTEL: vcvttpd2udqs ymm22, zmm23
+0x62,0xa5,0xfc,0x48,0x6c,0xf7
+
+# ATT: vcvttpd2udqs {sae}, %zmm23, %ymm22
+# INTEL: vcvttpd2udqs ymm22, zmm23, {sae}
+0x62,0xa5,0xfc,0x18,0x6c,0xf7
+
+# ATT: vcvttpd2udqs %zmm23, %ymm22 {%k7}
+# INTEL: vcvttpd2udqs ymm22 {k7}, zmm23
+0x62,0xa5,0xfc,0x4f,0x6c,0xf7
+
+# ATT: vcvttpd2udqs {sae}, %zmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0xfc,0x9f,0x6c,0xf7
+
+# ATT: vcvttpd2udqsx 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttpd2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfc,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2udqsx 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttpd2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfc,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2udqs (%rip){1to2}, %xmm22
+# INTEL: vcvttpd2udqs xmm22, qword ptr [rip]{1to2}
+0x62,0xe5,0xfc,0x18,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2udqsx -512(,%rbp,2), %xmm22
+# INTEL: vcvttpd2udqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0xfc,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2udqsx 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0xfc,0x8f,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2udqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+0x62,0xe5,0xfc,0x9f,0x6c,0x72,0x80
+
+# ATT: vcvttpd2udqs (%rip){1to4}, %xmm22
+# INTEL: vcvttpd2udqs xmm22, qword ptr [rip]{1to4}
+0x62,0xe5,0xfc,0x38,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2udqsy -1024(,%rbp,2), %xmm22
+# INTEL: vcvttpd2udqs xmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0xfc,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2udqsy 4064(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0xfc,0xaf,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2udqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+0x62,0xe5,0xfc,0xbf,0x6c,0x72,0x80
+
+# ATT: vcvttpd2udqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttpd2udqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfc,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttpd2udqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfc,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2udqs (%rip){1to8}, %ymm22
+# INTEL: vcvttpd2udqs ymm22, qword ptr [rip]{1to8}
+0x62,0xe5,0xfc,0x58,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2udqs -2048(,%rbp,2), %ymm22
+# INTEL: vcvttpd2udqs ymm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfc,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2udqs 8128(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfc,0xcf,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2udqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2udqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0xe5,0xfc,0xdf,0x6c,0x72,0x80
+
+# ATT: vcvttpd2uqqs %xmm23, %xmm22
+# INTEL: vcvttpd2uqqs xmm22, xmm23
+0x62,0xa5,0xfd,0x08,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttpd2uqqs xmm22 {k7}, xmm23
+0x62,0xa5,0xfd,0x0f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0xfd,0x8f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %ymm23, %ymm22
+# INTEL: vcvttpd2uqqs ymm22, ymm23
+0x62,0xa5,0xfd,0x28,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs {sae}, %ymm23, %ymm22
+# INTEL: vcvttpd2uqqs ymm22, ymm23, {sae}
+0x62,0xa5,0xf9,0x18,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %ymm23, %ymm22 {%k7}
+# INTEL: vcvttpd2uqqs ymm22 {k7}, ymm23
+0x62,0xa5,0xfd,0x2f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0xf9,0x9f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %zmm23, %zmm22
+# INTEL: vcvttpd2uqqs zmm22, zmm23
+0x62,0xa5,0xfd,0x48,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs {sae}, %zmm23, %zmm22
+# INTEL: vcvttpd2uqqs zmm22, zmm23, {sae}
+0x62,0xa5,0xfd,0x18,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs %zmm23, %zmm22 {%k7}
+# INTEL: vcvttpd2uqqs zmm22 {k7}, zmm23
+0x62,0xa5,0xfd,0x4f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs {sae}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0xfd,0x9f,0x6c,0xf7
+
+# ATT: vcvttpd2uqqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttpd2uqqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttpd2uqqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%rip){1to2}, %xmm22
+# INTEL: vcvttpd2uqqs xmm22, qword ptr [rip]{1to2}
+0x62,0xe5,0xfd,0x18,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2uqqs -512(,%rbp,2), %xmm22
+# INTEL: vcvttpd2uqqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0xfd,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttpd2uqqs 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0xfd,0x8f,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+0x62,0xe5,0xfd,0x9f,0x6c,0x72,0x80
+
+# ATT: vcvttpd2uqqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttpd2uqqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttpd2uqqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%rip){1to4}, %ymm22
+# INTEL: vcvttpd2uqqs ymm22, qword ptr [rip]{1to4}
+0x62,0xe5,0xfd,0x38,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2uqqs -1024(,%rbp,2), %ymm22
+# INTEL: vcvttpd2uqqs ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0xfd,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttpd2uqqs 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0xfd,0xaf,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+0x62,0xe5,0xfd,0xbf,0x6c,0x72,0x80
+
+# ATT: vcvttpd2uqqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttpd2uqqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfd,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttpd2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttpd2uqqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfd,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttpd2uqqs (%rip){1to8}, %zmm22
+# INTEL: vcvttpd2uqqs zmm22, qword ptr [rip]{1to8}
+0x62,0xe5,0xfd,0x58,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttpd2uqqs -2048(,%rbp,2), %zmm22
+# INTEL: vcvttpd2uqqs zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfd,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttpd2uqqs 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfd,0xcf,0x6c,0x71,0x7f
+
+# ATT: vcvttpd2uqqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+# INTEL: vcvttpd2uqqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+0x62,0xe5,0xfd,0xdf,0x6c,0x72,0x80
+
+# ATT: vcvttps2dqs %xmm23, %xmm22
+# INTEL: vcvttps2dqs xmm22, xmm23
+0x62,0xa5,0x7c,0x08,0x6d,0xf7
+
+# ATT: vcvttps2dqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttps2dqs xmm22 {k7}, xmm23
+0x62,0xa5,0x7c,0x0f,0x6d,0xf7
+
+# ATT: vcvttps2dqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttps2dqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7c,0x8f,0x6d,0xf7
+
+# ATT: vcvttps2dqs %ymm23, %ymm22
+# INTEL: vcvttps2dqs ymm22, ymm23
+0x62,0xa5,0x7c,0x28,0x6d,0xf7
+
+# ATT: vcvttps2dqs {sae}, %ymm23, %ymm22
+# INTEL: vcvttps2dqs ymm22, ymm23, {sae}
+0x62,0xa5,0x78,0x18,0x6d,0xf7
+
+# ATT: vcvttps2dqs %ymm23, %ymm22 {%k7}
+# INTEL: vcvttps2dqs ymm22 {k7}, ymm23
+0x62,0xa5,0x7c,0x2f,0x6d,0xf7
+
+# ATT: vcvttps2dqs {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2dqs ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x78,0x9f,0x6d,0xf7
+
+# ATT: vcvttps2dqs %zmm23, %zmm22
+# INTEL: vcvttps2dqs zmm22, zmm23
+0x62,0xa5,0x7c,0x48,0x6d,0xf7
+
+# ATT: vcvttps2dqs {sae}, %zmm23, %zmm22
+# INTEL: vcvttps2dqs zmm22, zmm23, {sae}
+0x62,0xa5,0x7c,0x18,0x6d,0xf7
+
+# ATT: vcvttps2dqs %zmm23, %zmm22 {%k7}
+# INTEL: vcvttps2dqs zmm22 {k7}, zmm23
+0x62,0xa5,0x7c,0x4f,0x6d,0xf7
+
+# ATT: vcvttps2dqs {sae}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vcvttps2dqs zmm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0x7c,0x9f,0x6d,0xf7
+
+# ATT: vcvttps2dqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttps2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttps2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%rip){1to4}, %xmm22
+# INTEL: vcvttps2dqs xmm22, dword ptr [rip]{1to4}
+0x62,0xe5,0x7c,0x18,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2dqs -512(,%rbp,2), %xmm22
+# INTEL: vcvttps2dqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7c,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2dqs 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttps2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7c,0x8f,0x6d,0x71,0x7f
+
+# ATT: vcvttps2dqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+# INTEL: vcvttps2dqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe5,0x7c,0x9f,0x6d,0x72,0x80
+
+# ATT: vcvttps2dqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttps2dqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttps2dqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%rip){1to8}, %ymm22
+# INTEL: vcvttps2dqs ymm22, dword ptr [rip]{1to8}
+0x62,0xe5,0x7c,0x38,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2dqs -1024(,%rbp,2), %ymm22
+# INTEL: vcvttps2dqs ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7c,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2dqs 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttps2dqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7c,0xaf,0x6d,0x71,0x7f
+
+# ATT: vcvttps2dqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+# INTEL: vcvttps2dqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe5,0x7c,0xbf,0x6d,0x72,0x80
+
+# ATT: vcvttps2dqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttps2dqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2dqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttps2dqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2dqs (%rip){1to16}, %zmm22
+# INTEL: vcvttps2dqs zmm22, dword ptr [rip]{1to16}
+0x62,0xe5,0x7c,0x58,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2dqs -2048(,%rbp,2), %zmm22
+# INTEL: vcvttps2dqs zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7c,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttps2dqs 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttps2dqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7c,0xcf,0x6d,0x71,0x7f
+
+# ATT: vcvttps2dqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+# INTEL: vcvttps2dqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0xe5,0x7c,0xdf,0x6d,0x72,0x80
+
+# ATT: vcvttps2qqs %xmm23, %xmm22
+# INTEL: vcvttps2qqs xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x6d,0xf7
+
+# ATT: vcvttps2qqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttps2qqs xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x6d,0xf7
+
+# ATT: vcvttps2qqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttps2qqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x6d,0xf7
+
+# ATT: vcvttps2qqs %xmm23, %ymm22
+# INTEL: vcvttps2qqs ymm22, xmm23
+0x62,0xa5,0x7d,0x28,0x6d,0xf7
+
+# ATT: vcvttps2qqs {sae}, %xmm23, %ymm22
+# INTEL: vcvttps2qqs ymm22, xmm23, {sae}
+0x62,0xa5,0x79,0x18,0x6d,0xf7
+
+# ATT: vcvttps2qqs %xmm23, %ymm22 {%k7}
+# INTEL: vcvttps2qqs ymm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x2f,0x6d,0xf7
+
+# ATT: vcvttps2qqs {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2qqs ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x79,0x9f,0x6d,0xf7
+
+# ATT: vcvttps2qqs %ymm23, %zmm22
+# INTEL: vcvttps2qqs zmm22, ymm23
+0x62,0xa5,0x7d,0x48,0x6d,0xf7
+
+# ATT: vcvttps2qqs {sae}, %ymm23, %zmm22
+# INTEL: vcvttps2qqs zmm22, ymm23, {sae}
+0x62,0xa5,0x7d,0x18,0x6d,0xf7
+
+# ATT: vcvttps2qqs %ymm23, %zmm22 {%k7}
+# INTEL: vcvttps2qqs zmm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x4f,0x6d,0xf7
+
+# ATT: vcvttps2qqs {sae}, %ymm23, %zmm22 {%k7} {z}
+# INTEL: vcvttps2qqs zmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x7d,0x9f,0x6d,0xf7
+
+# ATT: vcvttps2qqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttps2qqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttps2qqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%rip){1to2}, %xmm22
+# INTEL: vcvttps2qqs xmm22, dword ptr [rip]{1to2}
+0x62,0xe5,0x7d,0x18,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2qqs -256(,%rbp,2), %xmm22
+# INTEL: vcvttps2qqs xmm22, qword ptr [2*rbp - 256]
+0x62,0xe5,0x7d,0x08,0x6d,0x34,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttps2qqs 1016(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttps2qqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+0x62,0xe5,0x7d,0x8f,0x6d,0x71,0x7f
+
+# ATT: vcvttps2qqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttps2qqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+0x62,0xe5,0x7d,0x9f,0x6d,0x72,0x80
+
+# ATT: vcvttps2qqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttps2qqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttps2qqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%rip){1to4}, %ymm22
+# INTEL: vcvttps2qqs ymm22, dword ptr [rip]{1to4}
+0x62,0xe5,0x7d,0x38,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2qqs -512(,%rbp,2), %ymm22
+# INTEL: vcvttps2qqs ymm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x28,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2qqs 2032(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttps2qqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0xaf,0x6d,0x71,0x7f
+
+# ATT: vcvttps2qqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+# INTEL: vcvttps2qqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe5,0x7d,0xbf,0x6d,0x72,0x80
+
+# ATT: vcvttps2qqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttps2qqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttps2qqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2qqs (%rip){1to8}, %zmm22
+# INTEL: vcvttps2qqs zmm22, dword ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x58,0x6d,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2qqs -1024(,%rbp,2), %zmm22
+# INTEL: vcvttps2qqs zmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x48,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2qqs 4064(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttps2qqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xcf,0x6d,0x71,0x7f
+
+# ATT: vcvttps2qqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+# INTEL: vcvttps2qqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe5,0x7d,0xdf,0x6d,0x72,0x80
+
+# ATT: vcvttps2udqs %xmm23, %xmm22
+# INTEL: vcvttps2udqs xmm22, xmm23
+0x62,0xa5,0x7c,0x08,0x6c,0xf7
+
+# ATT: vcvttps2udqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttps2udqs xmm22 {k7}, xmm23
+0x62,0xa5,0x7c,0x0f,0x6c,0xf7
+
+# ATT: vcvttps2udqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttps2udqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7c,0x8f,0x6c,0xf7
+
+# ATT: vcvttps2udqs %ymm23, %ymm22
+# INTEL: vcvttps2udqs ymm22, ymm23
+0x62,0xa5,0x7c,0x28,0x6c,0xf7
+
+# ATT: vcvttps2udqs {sae}, %ymm23, %ymm22
+# INTEL: vcvttps2udqs ymm22, ymm23, {sae}
+0x62,0xa5,0x78,0x18,0x6c,0xf7
+
+# ATT: vcvttps2udqs %ymm23, %ymm22 {%k7}
+# INTEL: vcvttps2udqs ymm22 {k7}, ymm23
+0x62,0xa5,0x7c,0x2f,0x6c,0xf7
+
+# ATT: vcvttps2udqs {sae}, %ymm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2udqs ymm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x78,0x9f,0x6c,0xf7
+
+# ATT: vcvttps2udqs %zmm23, %zmm22
+# INTEL: vcvttps2udqs zmm22, zmm23
+0x62,0xa5,0x7c,0x48,0x6c,0xf7
+
+# ATT: vcvttps2udqs {sae}, %zmm23, %zmm22
+# INTEL: vcvttps2udqs zmm22, zmm23, {sae}
+0x62,0xa5,0x7c,0x18,0x6c,0xf7
+
+# ATT: vcvttps2udqs %zmm23, %zmm22 {%k7}
+# INTEL: vcvttps2udqs zmm22 {k7}, zmm23
+0x62,0xa5,0x7c,0x4f,0x6c,0xf7
+
+# ATT: vcvttps2udqs {sae}, %zmm23, %zmm22 {%k7} {z}
+# INTEL: vcvttps2udqs zmm22 {k7} {z}, zmm23, {sae}
+0x62,0xa5,0x7c,0x9f,0x6c,0xf7
+
+# ATT: vcvttps2udqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttps2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttps2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%rip){1to4}, %xmm22
+# INTEL: vcvttps2udqs xmm22, dword ptr [rip]{1to4}
+0x62,0xe5,0x7c,0x18,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2udqs -512(,%rbp,2), %xmm22
+# INTEL: vcvttps2udqs xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7c,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2udqs 2032(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttps2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7c,0x8f,0x6c,0x71,0x7f
+
+# ATT: vcvttps2udqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+# INTEL: vcvttps2udqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe5,0x7c,0x9f,0x6c,0x72,0x80
+
+# ATT: vcvttps2udqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttps2udqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttps2udqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%rip){1to8}, %ymm22
+# INTEL: vcvttps2udqs ymm22, dword ptr [rip]{1to8}
+0x62,0xe5,0x7c,0x38,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2udqs -1024(,%rbp,2), %ymm22
+# INTEL: vcvttps2udqs ymm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7c,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2udqs 4064(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttps2udqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7c,0xaf,0x6c,0x71,0x7f
+
+# ATT: vcvttps2udqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+# INTEL: vcvttps2udqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe5,0x7c,0xbf,0x6c,0x72,0x80
+
+# ATT: vcvttps2udqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttps2udqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7c,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2udqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttps2udqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7c,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2udqs (%rip){1to16}, %zmm22
+# INTEL: vcvttps2udqs zmm22, dword ptr [rip]{1to16}
+0x62,0xe5,0x7c,0x58,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2udqs -2048(,%rbp,2), %zmm22
+# INTEL: vcvttps2udqs zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7c,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vcvttps2udqs 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttps2udqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7c,0xcf,0x6c,0x71,0x7f
+
+# ATT: vcvttps2udqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+# INTEL: vcvttps2udqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0xe5,0x7c,0xdf,0x6c,0x72,0x80
+
+# ATT: vcvttps2uqqs %xmm23, %xmm22
+# INTEL: vcvttps2uqqs xmm22, xmm23
+0x62,0xa5,0x7d,0x08,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %xmm23, %xmm22 {%k7}
+# INTEL: vcvttps2uqqs xmm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x0f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %xmm23, %xmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm22 {k7} {z}, xmm23
+0x62,0xa5,0x7d,0x8f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %xmm23, %ymm22
+# INTEL: vcvttps2uqqs ymm22, xmm23
+0x62,0xa5,0x7d,0x28,0x6c,0xf7
+
+# ATT: vcvttps2uqqs {sae}, %xmm23, %ymm22
+# INTEL: vcvttps2uqqs ymm22, xmm23, {sae}
+0x62,0xa5,0x79,0x18,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %xmm23, %ymm22 {%k7}
+# INTEL: vcvttps2uqqs ymm22 {k7}, xmm23
+0x62,0xa5,0x7d,0x2f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs {sae}, %xmm23, %ymm22 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm22 {k7} {z}, xmm23, {sae}
+0x62,0xa5,0x79,0x9f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %ymm23, %zmm22
+# INTEL: vcvttps2uqqs zmm22, ymm23
+0x62,0xa5,0x7d,0x48,0x6c,0xf7
+
+# ATT: vcvttps2uqqs {sae}, %ymm23, %zmm22
+# INTEL: vcvttps2uqqs zmm22, ymm23, {sae}
+0x62,0xa5,0x7d,0x18,0x6c,0xf7
+
+# ATT: vcvttps2uqqs %ymm23, %zmm22 {%k7}
+# INTEL: vcvttps2uqqs zmm22 {k7}, ymm23
+0x62,0xa5,0x7d,0x4f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs {sae}, %ymm23, %zmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm22 {k7} {z}, ymm23, {sae}
+0x62,0xa5,0x7d,0x9f,0x6c,0xf7
+
+# ATT: vcvttps2uqqs 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcvttps2uqqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+# INTEL: vcvttps2uqqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%rip){1to2}, %xmm22
+# INTEL: vcvttps2uqqs xmm22, dword ptr [rip]{1to2}
+0x62,0xe5,0x7d,0x18,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2uqqs -256(,%rbp,2), %xmm22
+# INTEL: vcvttps2uqqs xmm22, qword ptr [2*rbp - 256]
+0x62,0xe5,0x7d,0x08,0x6c,0x34,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttps2uqqs 1016(%rcx), %xmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+0x62,0xe5,0x7d,0x8f,0x6c,0x71,0x7f
+
+# ATT: vcvttps2uqqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+0x62,0xe5,0x7d,0x9f,0x6c,0x72,0x80
+
+# ATT: vcvttps2uqqs 268435456(%rbp,%r14,8), %ymm22
+# INTEL: vcvttps2uqqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+# INTEL: vcvttps2uqqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%rip){1to4}, %ymm22
+# INTEL: vcvttps2uqqs ymm22, dword ptr [rip]{1to4}
+0x62,0xe5,0x7d,0x38,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2uqqs -512(,%rbp,2), %ymm22
+# INTEL: vcvttps2uqqs ymm22, xmmword ptr [2*rbp - 512]
+0x62,0xe5,0x7d,0x28,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vcvttps2uqqs 2032(%rcx), %ymm22 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe5,0x7d,0xaf,0x6c,0x71,0x7f
+
+# ATT: vcvttps2uqqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+# INTEL: vcvttps2uqqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe5,0x7d,0xbf,0x6c,0x72,0x80
+
+# ATT: vcvttps2uqqs 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vcvttps2uqqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7d,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttps2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vcvttps2uqqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7d,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttps2uqqs (%rip){1to8}, %zmm22
+# INTEL: vcvttps2uqqs zmm22, dword ptr [rip]{1to8}
+0x62,0xe5,0x7d,0x58,0x6c,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcvttps2uqqs -1024(,%rbp,2), %zmm22
+# INTEL: vcvttps2uqqs zmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe5,0x7d,0x48,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vcvttps2uqqs 4064(%rcx), %zmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe5,0x7d,0xcf,0x6c,0x71,0x7f
+
+# ATT: vcvttps2uqqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+# INTEL: vcvttps2uqqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe5,0x7d,0xdf,0x6c,0x72,0x80
+
+# ATT: vcvttsd2sis %xmm22, %ecx
+# INTEL: vcvttsd2sis ecx, xmm22
+0x62,0xb5,0x7f,0x08,0x6d,0xce
+
+# ATT: vcvttsd2sis {sae}, %xmm22, %ecx
+# INTEL: vcvttsd2sis ecx, xmm22, {sae}
+0x62,0xb5,0x7f,0x18,0x6d,0xce
+
+# ATT: vcvttsd2sis %xmm22, %r9
+# INTEL: vcvttsd2sis r9, xmm22
+0x62,0x35,0xff,0x08,0x6d,0xce
+
+# ATT: vcvttsd2sis {sae}, %xmm22, %r9
+# INTEL: vcvttsd2sis r9, xmm22, {sae}
+0x62,0x35,0xff,0x18,0x6d,0xce
+
+# ATT: vcvttsd2sis 268435456(%rbp,%r14,8), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7f,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2sis 291(%r8,%rax,4), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [r8 + 4*rax + 291]
+0x62,0xd5,0x7f,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2sis (%rip), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [rip]
+0x62,0xf5,0x7f,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttsd2sis -256(,%rbp,2), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [2*rbp - 256]
+0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2sis 1016(%rcx), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [rcx + 1016]
+0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttsd2sis -1024(%rdx), %ecx
+# INTEL: vcvttsd2sis ecx, qword ptr [rdx - 1024]
+0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttsd2sis 268435456(%rbp,%r14,8), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xff,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2sis 291(%r8,%rax,4), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [r8 + 4*rax + 291]
+0x62,0x55,0xff,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2sis (%rip), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [rip]
+0x62,0x75,0xff,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttsd2sis -256(,%rbp,2), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [2*rbp - 256]
+0x62,0x75,0xff,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2sis 1016(%rcx), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [rcx + 1016]
+0x62,0x75,0xff,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttsd2sis -1024(%rdx), %r9
+# INTEL: vcvttsd2sis r9, qword ptr [rdx - 1024]
+0x62,0x75,0xff,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttsd2usis %xmm22, %ecx
+# INTEL: vcvttsd2usis ecx, xmm22
+0x62,0xb5,0x7f,0x08,0x6c,0xce
+
+# ATT: vcvttsd2usis {sae}, %xmm22, %ecx
+# INTEL: vcvttsd2usis ecx, xmm22, {sae}
+0x62,0xb5,0x7f,0x18,0x6c,0xce
+
+# ATT: vcvttsd2usis %xmm22, %r9
+# INTEL: vcvttsd2usis r9, xmm22
+0x62,0x35,0xff,0x08,0x6c,0xce
+
+# ATT: vcvttsd2usis {sae}, %xmm22, %r9
+# INTEL: vcvttsd2usis r9, xmm22, {sae}
+0x62,0x35,0xff,0x18,0x6c,0xce
+
+# ATT: vcvttsd2usis 268435456(%rbp,%r14,8), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7f,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2usis 291(%r8,%rax,4), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [r8 + 4*rax + 291]
+0x62,0xd5,0x7f,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2usis (%rip), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [rip]
+0x62,0xf5,0x7f,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttsd2usis -256(,%rbp,2), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [2*rbp - 256]
+0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2usis 1016(%rcx), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [rcx + 1016]
+0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttsd2usis -1024(%rdx), %ecx
+# INTEL: vcvttsd2usis ecx, qword ptr [rdx - 1024]
+0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80
+
+# ATT: vcvttsd2usis 268435456(%rbp,%r14,8), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xff,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttsd2usis 291(%r8,%rax,4), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [r8 + 4*rax + 291]
+0x62,0x55,0xff,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttsd2usis (%rip), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [rip]
+0x62,0x75,0xff,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttsd2usis -256(,%rbp,2), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [2*rbp - 256]
+0x62,0x75,0xff,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcvttsd2usis 1016(%rcx), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [rcx + 1016]
+0x62,0x75,0xff,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttsd2usis -1024(%rdx), %r9
+# INTEL: vcvttsd2usis r9, qword ptr [rdx - 1024]
+0x62,0x75,0xff,0x08,0x6c,0x4a,0x80
+
+# ATT: vcvttss2sis %xmm22, %ecx
+# INTEL: vcvttss2sis ecx, xmm22
+0x62,0xb5,0x7e,0x08,0x6d,0xce
+
+# ATT: vcvttss2sis {sae}, %xmm22, %ecx
+# INTEL: vcvttss2sis ecx, xmm22, {sae}
+0x62,0xb5,0x7e,0x18,0x6d,0xce
+
+# ATT: vcvttss2sis %xmm22, %r9
+# INTEL: vcvttss2sis r9, xmm22
+0x62,0x35,0xfe,0x08,0x6d,0xce
+
+# ATT: vcvttss2sis {sae}, %xmm22, %r9
+# INTEL: vcvttss2sis r9, xmm22, {sae}
+0x62,0x35,0xfe,0x18,0x6d,0xce
+
+# ATT: vcvttss2sis 268435456(%rbp,%r14,8), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2sis 291(%r8,%rax,4), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd5,0x7e,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2sis (%rip), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [rip]
+0x62,0xf5,0x7e,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttss2sis -128(,%rbp,2), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [2*rbp - 128]
+0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2sis 508(%rcx), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [rcx + 508]
+0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttss2sis -512(%rdx), %ecx
+# INTEL: vcvttss2sis ecx, dword ptr [rdx - 512]
+0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttss2sis 268435456(%rbp,%r14,8), %r9
+# INTEL: vcvttss2sis r9, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2sis 291(%r8,%rax,4), %r9
+# INTEL: vcvttss2sis r9, dword ptr [r8 + 4*rax + 291]
+0x62,0x55,0xfe,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2sis (%rip), %r9
+# INTEL: vcvttss2sis r9, dword ptr [rip]
+0x62,0x75,0xfe,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttss2sis -128(,%rbp,2), %r9
+# INTEL: vcvttss2sis r9, dword ptr [2*rbp - 128]
+0x62,0x75,0xfe,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2sis 508(%rcx), %r9
+# INTEL: vcvttss2sis r9, dword ptr [rcx + 508]
+0x62,0x75,0xfe,0x08,0x6d,0x49,0x7f
+
+# ATT: vcvttss2sis -512(%rdx), %r9
+# INTEL: vcvttss2sis r9, dword ptr [rdx - 512]
+0x62,0x75,0xfe,0x08,0x6d,0x4a,0x80
+
+# ATT: vcvttss2usis %xmm22, %ecx
+# INTEL: vcvttss2usis ecx, xmm22
+0x62,0xb5,0x7e,0x08,0x6c,0xce
+
+# ATT: vcvttss2usis {sae}, %xmm22, %ecx
+# INTEL: vcvttss2usis ecx, xmm22, {sae}
+0x62,0xb5,0x7e,0x18,0x6c,0xce
+
+# ATT: vcvttss2usis %xmm22, %r9
+# INTEL: vcvttss2usis r9, xmm22
+0x62,0x35,0xfe,0x08,0x6c,0xce
+
+# ATT: vcvttss2usis {sae}, %xmm22, %r9
+# INTEL: vcvttss2usis r9, xmm22, {sae}
+0x62,0x35,0xfe,0x18,0x6c,0xce
+
+# ATT: vcvttss2usis 268435456(%rbp,%r14,8), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb5,0x7e,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2usis 291(%r8,%rax,4), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [r8 + 4*rax + 291]
+0x62,0xd5,0x7e,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2usis (%rip), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [rip]
+0x62,0xf5,0x7e,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttss2usis -128(,%rbp,2), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [2*rbp - 128]
+0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2usis 508(%rcx), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [rcx + 508]
+0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttss2usis -512(%rdx), %ecx
+# INTEL: vcvttss2usis ecx, dword ptr [rdx - 512]
+0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80
+
+# ATT: vcvttss2usis 268435456(%rbp,%r14,8), %r9
+# INTEL: vcvttss2usis r9, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0x35,0xfe,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcvttss2usis 291(%r8,%rax,4), %r9
+# INTEL: vcvttss2usis r9, dword ptr [r8 + 4*rax + 291]
+0x62,0x55,0xfe,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcvttss2usis (%rip), %r9
+# INTEL: vcvttss2usis r9, dword ptr [rip]
+0x62,0x75,0xfe,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00
+
+# ATT: vcvttss2usis -128(,%rbp,2), %r9
+# INTEL: vcvttss2usis r9, dword ptr [2*rbp - 128]
+0x62,0x75,0xfe,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcvttss2usis 508(%rcx), %r9
+# INTEL: vcvttss2usis r9, dword ptr [rcx + 508]
+0x62,0x75,0xfe,0x08,0x6c,0x49,0x7f
+
+# ATT: vcvttss2usis -512(%rdx), %r9
+# INTEL: vcvttss2usis r9, dword ptr [rdx - 512]
+0x62,0x75,0xfe,0x08,0x6c,0x4a,0x80
+
diff --git a/llvm/test/MC/X86/avx10_2satcvtds-32-att.s b/llvm/test/MC/X86/avx10_2satcvtds-32-att.s
new file mode 100644
index 00000000000000..ec59839150b5ff
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2satcvtds-32-att.s
@@ -0,0 +1,1042 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvttsd2sis %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0xca]
+ vcvttsd2sis %xmm2, %ecx
+
+// CHECK: vcvttsd2sis {sae}, %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x6d,0xca]
+ vcvttsd2sis {sae}, %xmm2, %ecx
+
+// CHECK: vcvttsd2sis 268435456(%esp,%esi,8), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsd2sis 268435456(%esp,%esi,8), %ecx
+
+// CHECK: vcvttsd2sis 291(%edi,%eax,4), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttsd2sis 291(%edi,%eax,4), %ecx
+
+// CHECK: vcvttsd2sis (%eax), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x08]
+ vcvttsd2sis (%eax), %ecx
+
+// CHECK: vcvttsd2sis -256(,%ebp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis -256(,%ebp,2), %ecx
+
+// CHECK: vcvttsd2sis 1016(%ecx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis 1016(%ecx), %ecx
+
+// CHECK: vcvttsd2sis -1024(%edx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis -1024(%edx), %ecx
+
+// CHECK: vcvttsd2usis %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0xca]
+ vcvttsd2usis %xmm2, %ecx
+
+// CHECK: vcvttsd2usis {sae}, %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x6c,0xca]
+ vcvttsd2usis {sae}, %xmm2, %ecx
+
+// CHECK: vcvttsd2usis 268435456(%esp,%esi,8), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsd2usis 268435456(%esp,%esi,8), %ecx
+
+// CHECK: vcvttsd2usis 291(%edi,%eax,4), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttsd2usis 291(%edi,%eax,4), %ecx
+
+// CHECK: vcvttsd2usis (%eax), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x08]
+ vcvttsd2usis (%eax), %ecx
+
+// CHECK: vcvttsd2usis -256(,%ebp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis -256(,%ebp,2), %ecx
+
+// CHECK: vcvttsd2usis 1016(%ecx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis 1016(%ecx), %ecx
+
+// CHECK: vcvttsd2usis -1024(%edx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis -1024(%edx), %ecx
+
+// CHECK: vcvttss2sis %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0xca]
+ vcvttss2sis %xmm2, %ecx
+
+// CHECK: vcvttss2sis {sae}, %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x6d,0xca]
+ vcvttss2sis {sae}, %xmm2, %ecx
+
+// CHECK: vcvttss2sis 268435456(%esp,%esi,8), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttss2sis 268435456(%esp,%esi,8), %ecx
+
+// CHECK: vcvttss2sis 291(%edi,%eax,4), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttss2sis 291(%edi,%eax,4), %ecx
+
+// CHECK: vcvttss2sis (%eax), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x08]
+ vcvttss2sis (%eax), %ecx
+
+// CHECK: vcvttss2sis -128(,%ebp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis -128(,%ebp,2), %ecx
+
+// CHECK: vcvttss2sis 508(%ecx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis 508(%ecx), %ecx
+
+// CHECK: vcvttss2sis -512(%edx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis -512(%edx), %ecx
+
+// CHECK: vcvttss2usis %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0xca]
+ vcvttss2usis %xmm2, %ecx
+
+// CHECK: vcvttss2usis {sae}, %xmm2, %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x6c,0xca]
+ vcvttss2usis {sae}, %xmm2, %ecx
+
+// CHECK: vcvttss2usis 268435456(%esp,%esi,8), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttss2usis 268435456(%esp,%esi,8), %ecx
+
+// CHECK: vcvttss2usis 291(%edi,%eax,4), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttss2usis 291(%edi,%eax,4), %ecx
+
+// CHECK: vcvttss2usis (%eax), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x08]
+ vcvttss2usis (%eax), %ecx
+
+// CHECK: vcvttss2usis -128(,%ebp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis -128(,%ebp,2), %ecx
+
+// CHECK: vcvttss2usis 508(%ecx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis 508(%ecx), %ecx
+
+// CHECK: vcvttss2usis -512(%edx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis -512(%edx), %ecx
+
+// CHECK: vcvttpd2dqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0xd3]
+ vcvttpd2dqs %xmm3, %xmm2
+
+// CHECK: vcvttpd2dqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6d,0xd3]
+ vcvttpd2dqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2dqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6d,0xd3]
+ vcvttpd2dqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs %ymm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6d,0xd3]
+ vcvttpd2dqs %ymm3, %xmm2
+
+// CHECK: vcvttpd2dqs {sae}, %ymm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xf8,0x18,0x6d,0xd3]
+ vcvttpd2dqs {sae}, %ymm3, %xmm2
+
+// CHECK: vcvttpd2dqs %ymm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x2f,0x6d,0xd3]
+ vcvttpd2dqs %ymm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2dqs {sae}, %ymm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x9f,0x6d,0xd3]
+ vcvttpd2dqs {sae}, %ymm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs %zmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0xd3]
+ vcvttpd2dqs %zmm3, %ymm2
+
+// CHECK: vcvttpd2dqs {sae}, %zmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6d,0xd3]
+ vcvttpd2dqs {sae}, %zmm3, %ymm2
+
+// CHECK: vcvttpd2dqs %zmm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6d,0xd3]
+ vcvttpd2dqs %zmm3, %ymm2 {%k7}
+
+// CHECK: vcvttpd2dqs {sae}, %zmm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6d,0xd3]
+ vcvttpd2dqs {sae}, %zmm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqsx 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2dqsx 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttpd2dqsx 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2dqsx 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttpd2dqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6d,0x10]
+ vcvttpd2dqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttpd2dqsx -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2dqsx -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2dqsx 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6d,0x51,0x7f]
+ vcvttpd2dqsx 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6d,0x52,0x80]
+ vcvttpd2dqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs (%eax){1to4}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x6d,0x10]
+ vcvttpd2dqs (%eax){1to4}, %xmm2
+
+// CHECK: vcvttpd2dqsy -1024(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2dqsy -1024(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2dqsy 4064(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xaf,0x6d,0x51,0x7f]
+ vcvttpd2dqsy 4064(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x6d,0x52,0x80]
+ vcvttpd2dqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttpd2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttpd2dqs (%eax){1to8}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x6d,0x10]
+ vcvttpd2dqs (%eax){1to8}, %ymm2
+
+// CHECK: vcvttpd2dqs -2048(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2dqs -2048(,%ebp,2), %ymm2
+
+// CHECK: vcvttpd2dqs 8128(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xcf,0x6d,0x51,0x7f]
+ vcvttpd2dqs 8128(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x6d,0x52,0x80]
+ vcvttpd2dqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0xd3]
+ vcvttpd2qqs %xmm3, %xmm2
+
+// CHECK: vcvttpd2qqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6d,0xd3]
+ vcvttpd2qqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2qqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6d,0xd3]
+ vcvttpd2qqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0xd3]
+ vcvttpd2qqs %ymm3, %ymm2
+
+// CHECK: vcvttpd2qqs {sae}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xf9,0x18,0x6d,0xd3]
+ vcvttpd2qqs {sae}, %ymm3, %ymm2
+
+// CHECK: vcvttpd2qqs %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6d,0xd3]
+ vcvttpd2qqs %ymm3, %ymm2 {%k7}
+
+// CHECK: vcvttpd2qqs {sae}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x9f,0x6d,0xd3]
+ vcvttpd2qqs {sae}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0xd3]
+ vcvttpd2qqs %zmm3, %zmm2
+
+// CHECK: vcvttpd2qqs {sae}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6d,0xd3]
+ vcvttpd2qqs {sae}, %zmm3, %zmm2
+
+// CHECK: vcvttpd2qqs %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6d,0xd3]
+ vcvttpd2qqs %zmm3, %zmm2 {%k7}
+
+// CHECK: vcvttpd2qqs {sae}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6d,0xd3]
+ vcvttpd2qqs {sae}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttpd2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttpd2qqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6d,0x10]
+ vcvttpd2qqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttpd2qqs -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2qqs -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2qqs 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6d,0x51,0x7f]
+ vcvttpd2qqs 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6d,0x52,0x80]
+ vcvttpd2qqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttpd2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttpd2qqs (%eax){1to4}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x6d,0x10]
+ vcvttpd2qqs (%eax){1to4}, %ymm2
+
+// CHECK: vcvttpd2qqs -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2qqs -1024(,%ebp,2), %ymm2
+
+// CHECK: vcvttpd2qqs 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xaf,0x6d,0x51,0x7f]
+ vcvttpd2qqs 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x6d,0x52,0x80]
+ vcvttpd2qqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttpd2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttpd2qqs (%eax){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x6d,0x10]
+ vcvttpd2qqs (%eax){1to8}, %zmm2
+
+// CHECK: vcvttpd2qqs -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2qqs -2048(,%ebp,2), %zmm2
+
+// CHECK: vcvttpd2qqs 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xcf,0x6d,0x51,0x7f]
+ vcvttpd2qqs 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x6d,0x52,0x80]
+ vcvttpd2qqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0xd3]
+ vcvttpd2udqs %xmm3, %xmm2
+
+// CHECK: vcvttpd2udqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6c,0xd3]
+ vcvttpd2udqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2udqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6c,0xd3]
+ vcvttpd2udqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %ymm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6c,0xd3]
+ vcvttpd2udqs %ymm3, %xmm2
+
+// CHECK: vcvttpd2udqs {sae}, %ymm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xf8,0x18,0x6c,0xd3]
+ vcvttpd2udqs {sae}, %ymm3, %xmm2
+
+// CHECK: vcvttpd2udqs %ymm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x2f,0x6c,0xd3]
+ vcvttpd2udqs %ymm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2udqs {sae}, %ymm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x9f,0x6c,0xd3]
+ vcvttpd2udqs {sae}, %ymm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %zmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0xd3]
+ vcvttpd2udqs %zmm3, %ymm2
+
+// CHECK: vcvttpd2udqs {sae}, %zmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6c,0xd3]
+ vcvttpd2udqs {sae}, %zmm3, %ymm2
+
+// CHECK: vcvttpd2udqs %zmm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6c,0xd3]
+ vcvttpd2udqs %zmm3, %ymm2 {%k7}
+
+// CHECK: vcvttpd2udqs {sae}, %zmm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6c,0xd3]
+ vcvttpd2udqs {sae}, %zmm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqsx 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2udqsx 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttpd2udqsx 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2udqsx 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttpd2udqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6c,0x10]
+ vcvttpd2udqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttpd2udqsx -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2udqsx -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2udqsx 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6c,0x51,0x7f]
+ vcvttpd2udqsx 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6c,0x52,0x80]
+ vcvttpd2udqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs (%eax){1to4}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x6c,0x10]
+ vcvttpd2udqs (%eax){1to4}, %xmm2
+
+// CHECK: vcvttpd2udqsy -1024(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2udqsy -1024(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2udqsy 4064(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xaf,0x6c,0x51,0x7f]
+ vcvttpd2udqsy 4064(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x6c,0x52,0x80]
+ vcvttpd2udqs -1024(%edx){1to4}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttpd2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttpd2udqs (%eax){1to8}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x6c,0x10]
+ vcvttpd2udqs (%eax){1to8}, %ymm2
+
+// CHECK: vcvttpd2udqs -2048(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2udqs -2048(,%ebp,2), %ymm2
+
+// CHECK: vcvttpd2udqs 8128(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xcf,0x6c,0x51,0x7f]
+ vcvttpd2udqs 8128(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x6c,0x52,0x80]
+ vcvttpd2udqs -1024(%edx){1to8}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0xd3]
+ vcvttpd2uqqs %xmm3, %xmm2
+
+// CHECK: vcvttpd2uqqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6c,0xd3]
+ vcvttpd2uqqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttpd2uqqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6c,0xd3]
+ vcvttpd2uqqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0xd3]
+ vcvttpd2uqqs %ymm3, %ymm2
+
+// CHECK: vcvttpd2uqqs {sae}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xf9,0x18,0x6c,0xd3]
+ vcvttpd2uqqs {sae}, %ymm3, %ymm2
+
+// CHECK: vcvttpd2uqqs %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6c,0xd3]
+ vcvttpd2uqqs %ymm3, %ymm2 {%k7}
+
+// CHECK: vcvttpd2uqqs {sae}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x9f,0x6c,0xd3]
+ vcvttpd2uqqs {sae}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0xd3]
+ vcvttpd2uqqs %zmm3, %zmm2
+
+// CHECK: vcvttpd2uqqs {sae}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6c,0xd3]
+ vcvttpd2uqqs {sae}, %zmm3, %zmm2
+
+// CHECK: vcvttpd2uqqs %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6c,0xd3]
+ vcvttpd2uqqs %zmm3, %zmm2 {%k7}
+
+// CHECK: vcvttpd2uqqs {sae}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6c,0xd3]
+ vcvttpd2uqqs {sae}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttpd2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttpd2uqqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6c,0x10]
+ vcvttpd2uqqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttpd2uqqs -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2uqqs -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttpd2uqqs 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6c,0x51,0x7f]
+ vcvttpd2uqqs 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6c,0x52,0x80]
+ vcvttpd2uqqs -1024(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttpd2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttpd2uqqs (%eax){1to4}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x6c,0x10]
+ vcvttpd2uqqs (%eax){1to4}, %ymm2
+
+// CHECK: vcvttpd2uqqs -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2uqqs -1024(,%ebp,2), %ymm2
+
+// CHECK: vcvttpd2uqqs 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xaf,0x6c,0x51,0x7f]
+ vcvttpd2uqqs 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x6c,0x52,0x80]
+ vcvttpd2uqqs -1024(%edx){1to4}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttpd2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttpd2uqqs (%eax){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x6c,0x10]
+ vcvttpd2uqqs (%eax){1to8}, %zmm2
+
+// CHECK: vcvttpd2uqqs -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2uqqs -2048(,%ebp,2), %zmm2
+
+// CHECK: vcvttpd2uqqs 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xcf,0x6c,0x51,0x7f]
+ vcvttpd2uqqs 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x6c,0x52,0x80]
+ vcvttpd2uqqs -1024(%edx){1to8}, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0xd3]
+ vcvttps2dqs %xmm3, %xmm2
+
+// CHECK: vcvttps2dqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6d,0xd3]
+ vcvttps2dqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttps2dqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6d,0xd3]
+ vcvttps2dqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0xd3]
+ vcvttps2dqs %ymm3, %ymm2
+
+// CHECK: vcvttps2dqs {sae}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x6d,0xd3]
+ vcvttps2dqs {sae}, %ymm3, %ymm2
+
+// CHECK: vcvttps2dqs %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6d,0xd3]
+ vcvttps2dqs %ymm3, %ymm2 {%k7}
+
+// CHECK: vcvttps2dqs {sae}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x6d,0xd3]
+ vcvttps2dqs {sae}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0xd3]
+ vcvttps2dqs %zmm3, %zmm2
+
+// CHECK: vcvttps2dqs {sae}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6d,0xd3]
+ vcvttps2dqs {sae}, %zmm3, %zmm2
+
+// CHECK: vcvttps2dqs %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6d,0xd3]
+ vcvttps2dqs %zmm3, %zmm2 {%k7}
+
+// CHECK: vcvttps2dqs {sae}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6d,0xd3]
+ vcvttps2dqs {sae}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttps2dqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttps2dqs (%eax){1to4}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6d,0x10]
+ vcvttps2dqs (%eax){1to4}, %xmm2
+
+// CHECK: vcvttps2dqs -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2dqs -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttps2dqs 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6d,0x51,0x7f]
+ vcvttps2dqs 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6d,0x52,0x80]
+ vcvttps2dqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttps2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttps2dqs (%eax){1to8}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x6d,0x10]
+ vcvttps2dqs (%eax){1to8}, %ymm2
+
+// CHECK: vcvttps2dqs -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2dqs -1024(,%ebp,2), %ymm2
+
+// CHECK: vcvttps2dqs 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xaf,0x6d,0x51,0x7f]
+ vcvttps2dqs 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x6d,0x52,0x80]
+ vcvttps2dqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttps2dqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttps2dqs (%eax){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x6d,0x10]
+ vcvttps2dqs (%eax){1to16}, %zmm2
+
+// CHECK: vcvttps2dqs -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2dqs -2048(,%ebp,2), %zmm2
+
+// CHECK: vcvttps2dqs 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xcf,0x6d,0x51,0x7f]
+ vcvttps2dqs 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x6d,0x52,0x80]
+ vcvttps2dqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0xd3]
+ vcvttps2qqs %xmm3, %xmm2
+
+// CHECK: vcvttps2qqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6d,0xd3]
+ vcvttps2qqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttps2qqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6d,0xd3]
+ vcvttps2qqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs %xmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0xd3]
+ vcvttps2qqs %xmm3, %ymm2
+
+// CHECK: vcvttps2qqs {sae}, %xmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x6d,0xd3]
+ vcvttps2qqs {sae}, %xmm3, %ymm2
+
+// CHECK: vcvttps2qqs %xmm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6d,0xd3]
+ vcvttps2qqs %xmm3, %ymm2 {%k7}
+
+// CHECK: vcvttps2qqs {sae}, %xmm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x6d,0xd3]
+ vcvttps2qqs {sae}, %xmm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs %ymm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0xd3]
+ vcvttps2qqs %ymm3, %zmm2
+
+// CHECK: vcvttps2qqs {sae}, %ymm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6d,0xd3]
+ vcvttps2qqs {sae}, %ymm3, %zmm2
+
+// CHECK: vcvttps2qqs %ymm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6d,0xd3]
+ vcvttps2qqs %ymm3, %zmm2 {%k7}
+
+// CHECK: vcvttps2qqs {sae}, %ymm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6d,0xd3]
+ vcvttps2qqs {sae}, %ymm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttps2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttps2qqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6d,0x10]
+ vcvttps2qqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttps2qqs -256(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2qqs -256(,%ebp,2), %xmm2
+
+// CHECK: vcvttps2qqs 1016(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6d,0x51,0x7f]
+ vcvttps2qqs 1016(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6d,0x52,0x80]
+ vcvttps2qqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttps2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttps2qqs (%eax){1to4}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x6d,0x10]
+ vcvttps2qqs (%eax){1to4}, %ymm2
+
+// CHECK: vcvttps2qqs -512(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2qqs -512(,%ebp,2), %ymm2
+
+// CHECK: vcvttps2qqs 2032(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x6d,0x51,0x7f]
+ vcvttps2qqs 2032(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x6d,0x52,0x80]
+ vcvttps2qqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttps2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttps2qqs (%eax){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x6d,0x10]
+ vcvttps2qqs (%eax){1to8}, %zmm2
+
+// CHECK: vcvttps2qqs -1024(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2qqs -1024(,%ebp,2), %zmm2
+
+// CHECK: vcvttps2qqs 4064(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x6d,0x51,0x7f]
+ vcvttps2qqs 4064(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x6d,0x52,0x80]
+ vcvttps2qqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0xd3]
+ vcvttps2udqs %xmm3, %xmm2
+
+// CHECK: vcvttps2udqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6c,0xd3]
+ vcvttps2udqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttps2udqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6c,0xd3]
+ vcvttps2udqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0xd3]
+ vcvttps2udqs %ymm3, %ymm2
+
+// CHECK: vcvttps2udqs {sae}, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x6c,0xd3]
+ vcvttps2udqs {sae}, %ymm3, %ymm2
+
+// CHECK: vcvttps2udqs %ymm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6c,0xd3]
+ vcvttps2udqs %ymm3, %ymm2 {%k7}
+
+// CHECK: vcvttps2udqs {sae}, %ymm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x6c,0xd3]
+ vcvttps2udqs {sae}, %ymm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0xd3]
+ vcvttps2udqs %zmm3, %zmm2
+
+// CHECK: vcvttps2udqs {sae}, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6c,0xd3]
+ vcvttps2udqs {sae}, %zmm3, %zmm2
+
+// CHECK: vcvttps2udqs %zmm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6c,0xd3]
+ vcvttps2udqs %zmm3, %zmm2 {%k7}
+
+// CHECK: vcvttps2udqs {sae}, %zmm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6c,0xd3]
+ vcvttps2udqs {sae}, %zmm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttps2udqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttps2udqs (%eax){1to4}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6c,0x10]
+ vcvttps2udqs (%eax){1to4}, %xmm2
+
+// CHECK: vcvttps2udqs -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2udqs -512(,%ebp,2), %xmm2
+
+// CHECK: vcvttps2udqs 2032(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6c,0x51,0x7f]
+ vcvttps2udqs 2032(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6c,0x52,0x80]
+ vcvttps2udqs -512(%edx){1to4}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttps2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttps2udqs (%eax){1to8}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x6c,0x10]
+ vcvttps2udqs (%eax){1to8}, %ymm2
+
+// CHECK: vcvttps2udqs -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2udqs -1024(,%ebp,2), %ymm2
+
+// CHECK: vcvttps2udqs 4064(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xaf,0x6c,0x51,0x7f]
+ vcvttps2udqs 4064(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x6c,0x52,0x80]
+ vcvttps2udqs -512(%edx){1to8}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttps2udqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttps2udqs (%eax){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x6c,0x10]
+ vcvttps2udqs (%eax){1to16}, %zmm2
+
+// CHECK: vcvttps2udqs -2048(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2udqs -2048(,%ebp,2), %zmm2
+
+// CHECK: vcvttps2udqs 8128(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xcf,0x6c,0x51,0x7f]
+ vcvttps2udqs 8128(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x6c,0x52,0x80]
+ vcvttps2udqs -512(%edx){1to16}, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0xd3]
+ vcvttps2uqqs %xmm3, %xmm2
+
+// CHECK: vcvttps2uqqs %xmm3, %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6c,0xd3]
+ vcvttps2uqqs %xmm3, %xmm2 {%k7}
+
+// CHECK: vcvttps2uqqs %xmm3, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6c,0xd3]
+ vcvttps2uqqs %xmm3, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %xmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0xd3]
+ vcvttps2uqqs %xmm3, %ymm2
+
+// CHECK: vcvttps2uqqs {sae}, %xmm3, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x6c,0xd3]
+ vcvttps2uqqs {sae}, %xmm3, %ymm2
+
+// CHECK: vcvttps2uqqs %xmm3, %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6c,0xd3]
+ vcvttps2uqqs %xmm3, %ymm2 {%k7}
+
+// CHECK: vcvttps2uqqs {sae}, %xmm3, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x6c,0xd3]
+ vcvttps2uqqs {sae}, %xmm3, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %ymm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0xd3]
+ vcvttps2uqqs %ymm3, %zmm2
+
+// CHECK: vcvttps2uqqs {sae}, %ymm3, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6c,0xd3]
+ vcvttps2uqqs {sae}, %ymm3, %zmm2
+
+// CHECK: vcvttps2uqqs %ymm3, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6c,0xd3]
+ vcvttps2uqqs %ymm3, %zmm2 {%k7}
+
+// CHECK: vcvttps2uqqs {sae}, %ymm3, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6c,0xd3]
+ vcvttps2uqqs {sae}, %ymm3, %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcvttps2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%edi,%eax,4), %xmm2 {%k7}
+
+// CHECK: vcvttps2uqqs (%eax){1to2}, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6c,0x10]
+ vcvttps2uqqs (%eax){1to2}, %xmm2
+
+// CHECK: vcvttps2uqqs -256(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2uqqs -256(,%ebp,2), %xmm2
+
+// CHECK: vcvttps2uqqs 1016(%ecx), %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6c,0x51,0x7f]
+ vcvttps2uqqs 1016(%ecx), %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6c,0x52,0x80]
+ vcvttps2uqqs -512(%edx){1to2}, %xmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%esp,%esi,8), %ymm2
+
+// CHECK: vcvttps2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%edi,%eax,4), %ymm2 {%k7}
+
+// CHECK: vcvttps2uqqs (%eax){1to4}, %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x6c,0x10]
+ vcvttps2uqqs (%eax){1to4}, %ymm2
+
+// CHECK: vcvttps2uqqs -512(,%ebp,2), %ymm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2uqqs -512(,%ebp,2), %ymm2
+
+// CHECK: vcvttps2uqqs 2032(%ecx), %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x6c,0x51,0x7f]
+ vcvttps2uqqs 2032(%ecx), %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x6c,0x52,0x80]
+ vcvttps2uqqs -512(%edx){1to4}, %ymm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%esp,%esi,8), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%esp,%esi,8), %zmm2
+
+// CHECK: vcvttps2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%edi,%eax,4), %zmm2 {%k7}
+
+// CHECK: vcvttps2uqqs (%eax){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x6c,0x10]
+ vcvttps2uqqs (%eax){1to8}, %zmm2
+
+// CHECK: vcvttps2uqqs -1024(,%ebp,2), %zmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2uqqs -1024(,%ebp,2), %zmm2
+
+// CHECK: vcvttps2uqqs 4064(%ecx), %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x6c,0x51,0x7f]
+ vcvttps2uqqs 4064(%ecx), %zmm2 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x6c,0x52,0x80]
+ vcvttps2uqqs -512(%edx){1to8}, %zmm2 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10_2satcvtds-32-intel.s b/llvm/test/MC/X86/avx10_2satcvtds-32-intel.s
new file mode 100644
index 00000000000000..37a090de2f2ceb
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2satcvtds-32-intel.s
@@ -0,0 +1,1042 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvttsd2sis ecx, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0xca]
+ vcvttsd2sis ecx, xmm2
+
+// CHECK: vcvttsd2sis ecx, xmm2, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x6d,0xca]
+ vcvttsd2sis ecx, xmm2, {sae}
+
+// CHECK: vcvttsd2sis ecx, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsd2sis ecx, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttsd2sis ecx, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x08]
+ vcvttsd2sis ecx, qword ptr [eax]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis ecx, qword ptr [2*ebp - 256]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis ecx, qword ptr [ecx + 1016]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis ecx, qword ptr [edx - 1024]
+
+// CHECK: vcvttsd2usis ecx, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0xca]
+ vcvttsd2usis ecx, xmm2
+
+// CHECK: vcvttsd2usis ecx, xmm2, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x6c,0xca]
+ vcvttsd2usis ecx, xmm2, {sae}
+
+// CHECK: vcvttsd2usis ecx, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttsd2usis ecx, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttsd2usis ecx, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x08]
+ vcvttsd2usis ecx, qword ptr [eax]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis ecx, qword ptr [2*ebp - 256]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis ecx, qword ptr [ecx + 1016]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis ecx, qword ptr [edx - 1024]
+
+// CHECK: vcvttss2sis ecx, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0xca]
+ vcvttss2sis ecx, xmm2
+
+// CHECK: vcvttss2sis ecx, xmm2, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x6d,0xca]
+ vcvttss2sis ecx, xmm2, {sae}
+
+// CHECK: vcvttss2sis ecx, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttss2sis ecx, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttss2sis ecx, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttss2sis ecx, dword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttss2sis ecx, dword ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x08]
+ vcvttss2sis ecx, dword ptr [eax]
+
+// CHECK: vcvttss2sis ecx, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis ecx, dword ptr [2*ebp - 128]
+
+// CHECK: vcvttss2sis ecx, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis ecx, dword ptr [ecx + 508]
+
+// CHECK: vcvttss2sis ecx, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis ecx, dword ptr [edx - 512]
+
+// CHECK: vcvttss2usis ecx, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0xca]
+ vcvttss2usis ecx, xmm2
+
+// CHECK: vcvttss2usis ecx, xmm2, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7e,0x18,0x6c,0xca]
+ vcvttss2usis ecx, xmm2, {sae}
+
+// CHECK: vcvttss2usis ecx, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x8c,0xf4,0x00,0x00,0x00,0x10]
+ vcvttss2usis ecx, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttss2usis ecx, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x8c,0x87,0x23,0x01,0x00,0x00]
+ vcvttss2usis ecx, dword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttss2usis ecx, dword ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x08]
+ vcvttss2usis ecx, dword ptr [eax]
+
+// CHECK: vcvttss2usis ecx, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis ecx, dword ptr [2*ebp - 128]
+
+// CHECK: vcvttss2usis ecx, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis ecx, dword ptr [ecx + 508]
+
+// CHECK: vcvttss2usis ecx, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis ecx, dword ptr [edx - 512]
+
+// CHECK: vcvttpd2dqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0xd3]
+ vcvttpd2dqs xmm2, xmm3
+
+// CHECK: vcvttpd2dqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6d,0xd3]
+ vcvttpd2dqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6d,0xd3]
+ vcvttpd2dqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttpd2dqs xmm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6d,0xd3]
+ vcvttpd2dqs xmm2, ymm3
+
+// CHECK: vcvttpd2dqs xmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x18,0x6d,0xd3]
+ vcvttpd2dqs xmm2, ymm3, {sae}
+
+// CHECK: vcvttpd2dqs xmm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x2f,0x6d,0xd3]
+ vcvttpd2dqs xmm2 {k7}, ymm3
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x9f,0x6d,0xd3]
+ vcvttpd2dqs xmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2dqs ymm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0xd3]
+ vcvttpd2dqs ymm2, zmm3
+
+// CHECK: vcvttpd2dqs ymm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6d,0xd3]
+ vcvttpd2dqs ymm2, zmm3, {sae}
+
+// CHECK: vcvttpd2dqs ymm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6d,0xd3]
+ vcvttpd2dqs ymm2 {k7}, zmm3
+
+// CHECK: vcvttpd2dqs ymm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6d,0xd3]
+ vcvttpd2dqs ymm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttpd2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2dqs xmm2, qword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6d,0x10]
+ vcvttpd2dqs xmm2, qword ptr [eax]{1to2}
+
+// CHECK: vcvttpd2dqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2dqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6d,0x51,0x7f]
+ vcvttpd2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6d,0x52,0x80]
+ vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+
+// CHECK: vcvttpd2dqs xmm2, qword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x6d,0x10]
+ vcvttpd2dqs xmm2, qword ptr [eax]{1to4}
+
+// CHECK: vcvttpd2dqs xmm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2dqs xmm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0xfc,0xaf,0x6d,0x51,0x7f]
+ vcvttpd2dqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x6d,0x52,0x80]
+ vcvttpd2dqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+
+// CHECK: vcvttpd2dqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2dqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2dqs ymm2, qword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x6d,0x10]
+ vcvttpd2dqs ymm2, qword ptr [eax]{1to8}
+
+// CHECK: vcvttpd2dqs ymm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2dqs ymm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttpd2dqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfc,0xcf,0x6d,0x51,0x7f]
+ vcvttpd2dqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttpd2dqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x6d,0x52,0x80]
+ vcvttpd2dqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvttpd2qqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0xd3]
+ vcvttpd2qqs xmm2, xmm3
+
+// CHECK: vcvttpd2qqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6d,0xd3]
+ vcvttpd2qqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttpd2qqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6d,0xd3]
+ vcvttpd2qqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttpd2qqs ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0xd3]
+ vcvttpd2qqs ymm2, ymm3
+
+// CHECK: vcvttpd2qqs ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x18,0x6d,0xd3]
+ vcvttpd2qqs ymm2, ymm3, {sae}
+
+// CHECK: vcvttpd2qqs ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6d,0xd3]
+ vcvttpd2qqs ymm2 {k7}, ymm3
+
+// CHECK: vcvttpd2qqs ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x9f,0x6d,0xd3]
+ vcvttpd2qqs ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2qqs zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0xd3]
+ vcvttpd2qqs zmm2, zmm3
+
+// CHECK: vcvttpd2qqs zmm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6d,0xd3]
+ vcvttpd2qqs zmm2, zmm3, {sae}
+
+// CHECK: vcvttpd2qqs zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6d,0xd3]
+ vcvttpd2qqs zmm2 {k7}, zmm3
+
+// CHECK: vcvttpd2qqs zmm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6d,0xd3]
+ vcvttpd2qqs zmm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttpd2qqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2qqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2qqs xmm2, qword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6d,0x10]
+ vcvttpd2qqs xmm2, qword ptr [eax]{1to2}
+
+// CHECK: vcvttpd2qqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2qqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttpd2qqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6d,0x51,0x7f]
+ vcvttpd2qqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttpd2qqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6d,0x52,0x80]
+ vcvttpd2qqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+
+// CHECK: vcvttpd2qqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2qqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2qqs ymm2, qword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x6d,0x10]
+ vcvttpd2qqs ymm2, qword ptr [eax]{1to4}
+
+// CHECK: vcvttpd2qqs ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2qqs ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttpd2qqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0xfd,0xaf,0x6d,0x51,0x7f]
+ vcvttpd2qqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttpd2qqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x6d,0x52,0x80]
+ vcvttpd2qqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+
+// CHECK: vcvttpd2qqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2qqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2qqs zmm2, qword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x6d,0x10]
+ vcvttpd2qqs zmm2, qword ptr [eax]{1to8}
+
+// CHECK: vcvttpd2qqs zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2qqs zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttpd2qqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfd,0xcf,0x6d,0x51,0x7f]
+ vcvttpd2qqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttpd2qqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x6d,0x52,0x80]
+ vcvttpd2qqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvttpd2udqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0xd3]
+ vcvttpd2udqs xmm2, xmm3
+
+// CHECK: vcvttpd2udqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6c,0xd3]
+ vcvttpd2udqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6c,0xd3]
+ vcvttpd2udqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttpd2udqs xmm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6c,0xd3]
+ vcvttpd2udqs xmm2, ymm3
+
+// CHECK: vcvttpd2udqs xmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x18,0x6c,0xd3]
+ vcvttpd2udqs xmm2, ymm3, {sae}
+
+// CHECK: vcvttpd2udqs xmm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x2f,0x6c,0xd3]
+ vcvttpd2udqs xmm2 {k7}, ymm3
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf8,0x9f,0x6c,0xd3]
+ vcvttpd2udqs xmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2udqs ymm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0xd3]
+ vcvttpd2udqs ymm2, zmm3
+
+// CHECK: vcvttpd2udqs ymm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6c,0xd3]
+ vcvttpd2udqs ymm2, zmm3, {sae}
+
+// CHECK: vcvttpd2udqs ymm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6c,0xd3]
+ vcvttpd2udqs ymm2 {k7}, zmm3
+
+// CHECK: vcvttpd2udqs ymm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6c,0xd3]
+ vcvttpd2udqs ymm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttpd2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2udqs xmm2, qword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x18,0x6c,0x10]
+ vcvttpd2udqs xmm2, qword ptr [eax]{1to2}
+
+// CHECK: vcvttpd2udqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2udqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x8f,0x6c,0x51,0x7f]
+ vcvttpd2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x9f,0x6c,0x52,0x80]
+ vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+
+// CHECK: vcvttpd2udqs xmm2, qword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x38,0x6c,0x10]
+ vcvttpd2udqs xmm2, qword ptr [eax]{1to4}
+
+// CHECK: vcvttpd2udqs xmm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2udqs xmm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0xfc,0xaf,0x6c,0x51,0x7f]
+ vcvttpd2udqs xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xbf,0x6c,0x52,0x80]
+ vcvttpd2udqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+
+// CHECK: vcvttpd2udqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs ymm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2udqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2udqs ymm2, qword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0x58,0x6c,0x10]
+ vcvttpd2udqs ymm2, qword ptr [eax]{1to8}
+
+// CHECK: vcvttpd2udqs ymm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2udqs ymm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttpd2udqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfc,0xcf,0x6c,0x51,0x7f]
+ vcvttpd2udqs ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttpd2udqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfc,0xdf,0x6c,0x52,0x80]
+ vcvttpd2udqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvttpd2uqqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0xd3]
+ vcvttpd2uqqs xmm2, xmm3
+
+// CHECK: vcvttpd2uqqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6c,0xd3]
+ vcvttpd2uqqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttpd2uqqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6c,0xd3]
+ vcvttpd2uqqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttpd2uqqs ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0xd3]
+ vcvttpd2uqqs ymm2, ymm3
+
+// CHECK: vcvttpd2uqqs ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x18,0x6c,0xd3]
+ vcvttpd2uqqs ymm2, ymm3, {sae}
+
+// CHECK: vcvttpd2uqqs ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6c,0xd3]
+ vcvttpd2uqqs ymm2 {k7}, ymm3
+
+// CHECK: vcvttpd2uqqs ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xf9,0x9f,0x6c,0xd3]
+ vcvttpd2uqqs ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttpd2uqqs zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0xd3]
+ vcvttpd2uqqs zmm2, zmm3
+
+// CHECK: vcvttpd2uqqs zmm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6c,0xd3]
+ vcvttpd2uqqs zmm2, zmm3, {sae}
+
+// CHECK: vcvttpd2uqqs zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6c,0xd3]
+ vcvttpd2uqqs zmm2 {k7}, zmm3
+
+// CHECK: vcvttpd2uqqs zmm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6c,0xd3]
+ vcvttpd2uqqs zmm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttpd2uqqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2uqqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2uqqs xmm2, qword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x18,0x6c,0x10]
+ vcvttpd2uqqs xmm2, qword ptr [eax]{1to2}
+
+// CHECK: vcvttpd2uqqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2uqqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttpd2uqqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x8f,0x6c,0x51,0x7f]
+ vcvttpd2uqqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttpd2uqqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x9f,0x6c,0x52,0x80]
+ vcvttpd2uqqs xmm2 {k7} {z}, qword ptr [edx - 1024]{1to2}
+
+// CHECK: vcvttpd2uqqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2uqqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2uqqs ymm2, qword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x38,0x6c,0x10]
+ vcvttpd2uqqs ymm2, qword ptr [eax]{1to4}
+
+// CHECK: vcvttpd2uqqs ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2uqqs ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttpd2uqqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0xfd,0xaf,0x6c,0x51,0x7f]
+ vcvttpd2uqqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttpd2uqqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xbf,0x6c,0x52,0x80]
+ vcvttpd2uqqs ymm2 {k7} {z}, qword ptr [edx - 1024]{1to4}
+
+// CHECK: vcvttpd2uqqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttpd2uqqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttpd2uqqs zmm2, qword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0x58,0x6c,0x10]
+ vcvttpd2uqqs zmm2, qword ptr [eax]{1to8}
+
+// CHECK: vcvttpd2uqqs zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2uqqs zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttpd2uqqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0xfd,0xcf,0x6c,0x51,0x7f]
+ vcvttpd2uqqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttpd2uqqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf5,0xfd,0xdf,0x6c,0x52,0x80]
+ vcvttpd2uqqs zmm2 {k7} {z}, qword ptr [edx - 1024]{1to8}
+
+// CHECK: vcvttps2dqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0xd3]
+ vcvttps2dqs xmm2, xmm3
+
+// CHECK: vcvttps2dqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6d,0xd3]
+ vcvttps2dqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttps2dqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6d,0xd3]
+ vcvttps2dqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttps2dqs ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0xd3]
+ vcvttps2dqs ymm2, ymm3
+
+// CHECK: vcvttps2dqs ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x6d,0xd3]
+ vcvttps2dqs ymm2, ymm3, {sae}
+
+// CHECK: vcvttps2dqs ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6d,0xd3]
+ vcvttps2dqs ymm2 {k7}, ymm3
+
+// CHECK: vcvttps2dqs ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x6d,0xd3]
+ vcvttps2dqs ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2dqs zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0xd3]
+ vcvttps2dqs zmm2, zmm3
+
+// CHECK: vcvttps2dqs zmm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6d,0xd3]
+ vcvttps2dqs zmm2, zmm3, {sae}
+
+// CHECK: vcvttps2dqs zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6d,0xd3]
+ vcvttps2dqs zmm2 {k7}, zmm3
+
+// CHECK: vcvttps2dqs zmm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6d,0xd3]
+ vcvttps2dqs zmm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttps2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2dqs xmm2, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6d,0x10]
+ vcvttps2dqs xmm2, dword ptr [eax]{1to4}
+
+// CHECK: vcvttps2dqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2dqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttps2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6d,0x51,0x7f]
+ vcvttps2dqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttps2dqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6d,0x52,0x80]
+ vcvttps2dqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvttps2dqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2dqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2dqs ymm2, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x6d,0x10]
+ vcvttps2dqs ymm2, dword ptr [eax]{1to8}
+
+// CHECK: vcvttps2dqs ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2dqs ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttps2dqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0xaf,0x6d,0x51,0x7f]
+ vcvttps2dqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttps2dqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x6d,0x52,0x80]
+ vcvttps2dqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+
+// CHECK: vcvttps2dqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2dqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2dqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2dqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2dqs zmm2, dword ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x6d,0x10]
+ vcvttps2dqs zmm2, dword ptr [eax]{1to16}
+
+// CHECK: vcvttps2dqs zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2dqs zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttps2dqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0xcf,0x6d,0x51,0x7f]
+ vcvttps2dqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttps2dqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x6d,0x52,0x80]
+ vcvttps2dqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvttps2qqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0xd3]
+ vcvttps2qqs xmm2, xmm3
+
+// CHECK: vcvttps2qqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6d,0xd3]
+ vcvttps2qqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttps2qqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6d,0xd3]
+ vcvttps2qqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttps2qqs ymm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0xd3]
+ vcvttps2qqs ymm2, xmm3
+
+// CHECK: vcvttps2qqs ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x6d,0xd3]
+ vcvttps2qqs ymm2, xmm3, {sae}
+
+// CHECK: vcvttps2qqs ymm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6d,0xd3]
+ vcvttps2qqs ymm2 {k7}, xmm3
+
+// CHECK: vcvttps2qqs ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x6d,0xd3]
+ vcvttps2qqs ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttps2qqs zmm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0xd3]
+ vcvttps2qqs zmm2, ymm3
+
+// CHECK: vcvttps2qqs zmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6d,0xd3]
+ vcvttps2qqs zmm2, ymm3, {sae}
+
+// CHECK: vcvttps2qqs zmm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6d,0xd3]
+ vcvttps2qqs zmm2 {k7}, ymm3
+
+// CHECK: vcvttps2qqs zmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6d,0xd3]
+ vcvttps2qqs zmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2qqs xmm2, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs xmm2, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2qqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2qqs xmm2, dword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6d,0x10]
+ vcvttps2qqs xmm2, dword ptr [eax]{1to2}
+
+// CHECK: vcvttps2qqs xmm2, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6d,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2qqs xmm2, qword ptr [2*ebp - 256]
+
+// CHECK: vcvttps2qqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6d,0x51,0x7f]
+ vcvttps2qqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+
+// CHECK: vcvttps2qqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6d,0x52,0x80]
+ vcvttps2qqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+
+// CHECK: vcvttps2qqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2qqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2qqs ymm2, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x6d,0x10]
+ vcvttps2qqs ymm2, dword ptr [eax]{1to4}
+
+// CHECK: vcvttps2qqs ymm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6d,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2qqs ymm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttps2qqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x6d,0x51,0x7f]
+ vcvttps2qqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttps2qqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x6d,0x52,0x80]
+ vcvttps2qqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvttps2qqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2qqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2qqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6d,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2qqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2qqs zmm2, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x6d,0x10]
+ vcvttps2qqs zmm2, dword ptr [eax]{1to8}
+
+// CHECK: vcvttps2qqs zmm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6d,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2qqs zmm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttps2qqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x6d,0x51,0x7f]
+ vcvttps2qqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttps2qqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x6d,0x52,0x80]
+ vcvttps2qqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+
+// CHECK: vcvttps2udqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0xd3]
+ vcvttps2udqs xmm2, xmm3
+
+// CHECK: vcvttps2udqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6c,0xd3]
+ vcvttps2udqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttps2udqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6c,0xd3]
+ vcvttps2udqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttps2udqs ymm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0xd3]
+ vcvttps2udqs ymm2, ymm3
+
+// CHECK: vcvttps2udqs ymm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x18,0x6c,0xd3]
+ vcvttps2udqs ymm2, ymm3, {sae}
+
+// CHECK: vcvttps2udqs ymm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6c,0xd3]
+ vcvttps2udqs ymm2 {k7}, ymm3
+
+// CHECK: vcvttps2udqs ymm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x78,0x9f,0x6c,0xd3]
+ vcvttps2udqs ymm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2udqs zmm2, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0xd3]
+ vcvttps2udqs zmm2, zmm3
+
+// CHECK: vcvttps2udqs zmm2, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6c,0xd3]
+ vcvttps2udqs zmm2, zmm3, {sae}
+
+// CHECK: vcvttps2udqs zmm2 {k7}, zmm3
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6c,0xd3]
+ vcvttps2udqs zmm2 {k7}, zmm3
+
+// CHECK: vcvttps2udqs zmm2 {k7} {z}, zmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6c,0xd3]
+ vcvttps2udqs zmm2 {k7} {z}, zmm3, {sae}
+
+// CHECK: vcvttps2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2udqs xmm2, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x18,0x6c,0x10]
+ vcvttps2udqs xmm2, dword ptr [eax]{1to4}
+
+// CHECK: vcvttps2udqs xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2udqs xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttps2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x8f,0x6c,0x51,0x7f]
+ vcvttps2udqs xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttps2udqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x9f,0x6c,0x52,0x80]
+ vcvttps2udqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvttps2udqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2udqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs ymm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2udqs ymm2, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x38,0x6c,0x10]
+ vcvttps2udqs ymm2, dword ptr [eax]{1to8}
+
+// CHECK: vcvttps2udqs ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2udqs ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttps2udqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7c,0xaf,0x6c,0x51,0x7f]
+ vcvttps2udqs ymm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttps2udqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xbf,0x6c,0x52,0x80]
+ vcvttps2udqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+
+// CHECK: vcvttps2udqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2udqs zmm2, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2udqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2udqs zmm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2udqs zmm2, dword ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0x58,0x6c,0x10]
+ vcvttps2udqs zmm2, dword ptr [eax]{1to16}
+
+// CHECK: vcvttps2udqs zmm2, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x48,0x6c,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2udqs zmm2, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vcvttps2udqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf5,0x7c,0xcf,0x6c,0x51,0x7f]
+ vcvttps2udqs zmm2 {k7} {z}, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvttps2udqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf5,0x7c,0xdf,0x6c,0x52,0x80]
+ vcvttps2udqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvttps2uqqs xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0xd3]
+ vcvttps2uqqs xmm2, xmm3
+
+// CHECK: vcvttps2uqqs xmm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6c,0xd3]
+ vcvttps2uqqs xmm2 {k7}, xmm3
+
+// CHECK: vcvttps2uqqs xmm2 {k7} {z}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6c,0xd3]
+ vcvttps2uqqs xmm2 {k7} {z}, xmm3
+
+// CHECK: vcvttps2uqqs ymm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0xd3]
+ vcvttps2uqqs ymm2, xmm3
+
+// CHECK: vcvttps2uqqs ymm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x18,0x6c,0xd3]
+ vcvttps2uqqs ymm2, xmm3, {sae}
+
+// CHECK: vcvttps2uqqs ymm2 {k7}, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6c,0xd3]
+ vcvttps2uqqs ymm2 {k7}, xmm3
+
+// CHECK: vcvttps2uqqs ymm2 {k7} {z}, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x79,0x9f,0x6c,0xd3]
+ vcvttps2uqqs ymm2 {k7} {z}, xmm3, {sae}
+
+// CHECK: vcvttps2uqqs zmm2, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0xd3]
+ vcvttps2uqqs zmm2, ymm3
+
+// CHECK: vcvttps2uqqs zmm2, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6c,0xd3]
+ vcvttps2uqqs zmm2, ymm3, {sae}
+
+// CHECK: vcvttps2uqqs zmm2 {k7}, ymm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6c,0xd3]
+ vcvttps2uqqs zmm2 {k7}, ymm3
+
+// CHECK: vcvttps2uqqs zmm2 {k7} {z}, ymm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6c,0xd3]
+ vcvttps2uqqs zmm2 {k7} {z}, ymm3, {sae}
+
+// CHECK: vcvttps2uqqs xmm2, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs xmm2, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2uqqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x0f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs xmm2 {k7}, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2uqqs xmm2, dword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x18,0x6c,0x10]
+ vcvttps2uqqs xmm2, dword ptr [eax]{1to2}
+
+// CHECK: vcvttps2uqqs xmm2, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6c,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2uqqs xmm2, qword ptr [2*ebp - 256]
+
+// CHECK: vcvttps2uqqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x8f,0x6c,0x51,0x7f]
+ vcvttps2uqqs xmm2 {k7} {z}, qword ptr [ecx + 1016]
+
+// CHECK: vcvttps2uqqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x9f,0x6c,0x52,0x80]
+ vcvttps2uqqs xmm2 {k7} {z}, dword ptr [edx - 512]{1to2}
+
+// CHECK: vcvttps2uqqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs ymm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2uqqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x2f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs ymm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2uqqs ymm2, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x38,0x6c,0x10]
+ vcvttps2uqqs ymm2, dword ptr [eax]{1to4}
+
+// CHECK: vcvttps2uqqs ymm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x28,0x6c,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2uqqs ymm2, xmmword ptr [2*ebp - 512]
+
+// CHECK: vcvttps2uqqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xaf,0x6c,0x51,0x7f]
+ vcvttps2uqqs ymm2 {k7} {z}, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvttps2uqqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xbf,0x6c,0x52,0x80]
+ vcvttps2uqqs ymm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvttps2uqqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs zmm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvttps2uqqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x4f,0x6c,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs zmm2 {k7}, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vcvttps2uqqs zmm2, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0x58,0x6c,0x10]
+ vcvttps2uqqs zmm2, dword ptr [eax]{1to8}
+
+// CHECK: vcvttps2uqqs zmm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x48,0x6c,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2uqqs zmm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vcvttps2uqqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf5,0x7d,0xcf,0x6c,0x51,0x7f]
+ vcvttps2uqqs zmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvttps2uqqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf5,0x7d,0xdf,0x6c,0x52,0x80]
+ vcvttps2uqqs zmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+
diff --git a/llvm/test/MC/X86/avx10_2satcvtds-64-att.s b/llvm/test/MC/X86/avx10_2satcvtds-64-att.s
new file mode 100644
index 00000000000000..c653bf52219a45
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2satcvtds-64-att.s
@@ -0,0 +1,1170 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvttsd2sis %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6d,0xce]
+ vcvttsd2sis %xmm22, %ecx
+
+// CHECK: vcvttsd2sis {sae}, %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x18,0x6d,0xce]
+ vcvttsd2sis {sae}, %xmm22, %ecx
+
+// CHECK: vcvttsd2sis %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6d,0xce]
+ vcvttsd2sis %xmm22, %r9
+
+// CHECK: vcvttsd2sis {sae}, %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x18,0x6d,0xce]
+ vcvttsd2sis {sae}, %xmm22, %r9
+
+// CHECK: vcvttsd2sis 268435456(%rbp,%r14,8), %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2sis 268435456(%rbp,%r14,8), %ecx
+
+// CHECK: vcvttsd2sis 291(%r8,%rax,4), %ecx
+// CHECK: encoding: [0x62,0xd5,0x7f,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2sis 291(%r8,%rax,4), %ecx
+
+// CHECK: vcvttsd2sis (%rip), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2sis (%rip), %ecx
+
+// CHECK: vcvttsd2sis -256(,%rbp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis -256(,%rbp,2), %ecx
+
+// CHECK: vcvttsd2sis 1016(%rcx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis 1016(%rcx), %ecx
+
+// CHECK: vcvttsd2sis -1024(%rdx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis -1024(%rdx), %ecx
+
+// CHECK: vcvttsd2sis 268435456(%rbp,%r14,8), %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2sis 268435456(%rbp,%r14,8), %r9
+
+// CHECK: vcvttsd2sis 291(%r8,%rax,4), %r9
+// CHECK: encoding: [0x62,0x55,0xff,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2sis 291(%r8,%rax,4), %r9
+
+// CHECK: vcvttsd2sis (%rip), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2sis (%rip), %r9
+
+// CHECK: vcvttsd2sis -256(,%rbp,2), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis -256(,%rbp,2), %r9
+
+// CHECK: vcvttsd2sis 1016(%rcx), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis 1016(%rcx), %r9
+
+// CHECK: vcvttsd2sis -1024(%rdx), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis -1024(%rdx), %r9
+
+// CHECK: vcvttsd2usis %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6c,0xce]
+ vcvttsd2usis %xmm22, %ecx
+
+// CHECK: vcvttsd2usis {sae}, %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x18,0x6c,0xce]
+ vcvttsd2usis {sae}, %xmm22, %ecx
+
+// CHECK: vcvttsd2usis %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6c,0xce]
+ vcvttsd2usis %xmm22, %r9
+
+// CHECK: vcvttsd2usis {sae}, %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x18,0x6c,0xce]
+ vcvttsd2usis {sae}, %xmm22, %r9
+
+// CHECK: vcvttsd2usis 268435456(%rbp,%r14,8), %ecx
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2usis 268435456(%rbp,%r14,8), %ecx
+
+// CHECK: vcvttsd2usis 291(%r8,%rax,4), %ecx
+// CHECK: encoding: [0x62,0xd5,0x7f,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2usis 291(%r8,%rax,4), %ecx
+
+// CHECK: vcvttsd2usis (%rip), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2usis (%rip), %ecx
+
+// CHECK: vcvttsd2usis -256(,%rbp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis -256(,%rbp,2), %ecx
+
+// CHECK: vcvttsd2usis 1016(%rcx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis 1016(%rcx), %ecx
+
+// CHECK: vcvttsd2usis -1024(%rdx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis -1024(%rdx), %ecx
+
+// CHECK: vcvttsd2usis 268435456(%rbp,%r14,8), %r9
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2usis 268435456(%rbp,%r14,8), %r9
+
+// CHECK: vcvttsd2usis 291(%r8,%rax,4), %r9
+// CHECK: encoding: [0x62,0x55,0xff,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2usis 291(%r8,%rax,4), %r9
+
+// CHECK: vcvttsd2usis (%rip), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2usis (%rip), %r9
+
+// CHECK: vcvttsd2usis -256(,%rbp,2), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis -256(,%rbp,2), %r9
+
+// CHECK: vcvttsd2usis 1016(%rcx), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis 1016(%rcx), %r9
+
+// CHECK: vcvttsd2usis -1024(%rdx), %r9
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis -1024(%rdx), %r9
+
+// CHECK: vcvttss2sis %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6d,0xce]
+ vcvttss2sis %xmm22, %ecx
+
+// CHECK: vcvttss2sis {sae}, %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x18,0x6d,0xce]
+ vcvttss2sis {sae}, %xmm22, %ecx
+
+// CHECK: vcvttss2sis %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6d,0xce]
+ vcvttss2sis %xmm22, %r9
+
+// CHECK: vcvttss2sis {sae}, %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x18,0x6d,0xce]
+ vcvttss2sis {sae}, %xmm22, %r9
+
+// CHECK: vcvttss2sis 268435456(%rbp,%r14,8), %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2sis 268435456(%rbp,%r14,8), %ecx
+
+// CHECK: vcvttss2sis 291(%r8,%rax,4), %ecx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2sis 291(%r8,%rax,4), %ecx
+
+// CHECK: vcvttss2sis (%rip), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2sis (%rip), %ecx
+
+// CHECK: vcvttss2sis -128(,%rbp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis -128(,%rbp,2), %ecx
+
+// CHECK: vcvttss2sis 508(%rcx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis 508(%rcx), %ecx
+
+// CHECK: vcvttss2sis -512(%rdx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis -512(%rdx), %ecx
+
+// CHECK: vcvttss2sis 268435456(%rbp,%r14,8), %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2sis 268435456(%rbp,%r14,8), %r9
+
+// CHECK: vcvttss2sis 291(%r8,%rax,4), %r9
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2sis 291(%r8,%rax,4), %r9
+
+// CHECK: vcvttss2sis (%rip), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2sis (%rip), %r9
+
+// CHECK: vcvttss2sis -128(,%rbp,2), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis -128(,%rbp,2), %r9
+
+// CHECK: vcvttss2sis 508(%rcx), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis 508(%rcx), %r9
+
+// CHECK: vcvttss2sis -512(%rdx), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis -512(%rdx), %r9
+
+// CHECK: vcvttss2usis %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6c,0xce]
+ vcvttss2usis %xmm22, %ecx
+
+// CHECK: vcvttss2usis {sae}, %xmm22, %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x18,0x6c,0xce]
+ vcvttss2usis {sae}, %xmm22, %ecx
+
+// CHECK: vcvttss2usis %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6c,0xce]
+ vcvttss2usis %xmm22, %r9
+
+// CHECK: vcvttss2usis {sae}, %xmm22, %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x18,0x6c,0xce]
+ vcvttss2usis {sae}, %xmm22, %r9
+
+// CHECK: vcvttss2usis 268435456(%rbp,%r14,8), %ecx
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2usis 268435456(%rbp,%r14,8), %ecx
+
+// CHECK: vcvttss2usis 291(%r8,%rax,4), %ecx
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2usis 291(%r8,%rax,4), %ecx
+
+// CHECK: vcvttss2usis (%rip), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2usis (%rip), %ecx
+
+// CHECK: vcvttss2usis -128(,%rbp,2), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis -128(,%rbp,2), %ecx
+
+// CHECK: vcvttss2usis 508(%rcx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis 508(%rcx), %ecx
+
+// CHECK: vcvttss2usis -512(%rdx), %ecx
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis -512(%rdx), %ecx
+
+// CHECK: vcvttss2usis 268435456(%rbp,%r14,8), %r9
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2usis 268435456(%rbp,%r14,8), %r9
+
+// CHECK: vcvttss2usis 291(%r8,%rax,4), %r9
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2usis 291(%r8,%rax,4), %r9
+
+// CHECK: vcvttss2usis (%rip), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2usis (%rip), %r9
+
+// CHECK: vcvttss2usis -128(,%rbp,2), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis -128(,%rbp,2), %r9
+
+// CHECK: vcvttss2usis 508(%rcx), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis 508(%rcx), %r9
+
+// CHECK: vcvttss2usis -512(%rdx), %r9
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis -512(%rdx), %r9
+
+// CHECK: vcvttpd2dqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6d,0xf7]
+ vcvttpd2dqs %xmm23, %xmm22
+
+// CHECK: vcvttpd2dqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x0f,0x6d,0xf7]
+ vcvttpd2dqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2dqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x8f,0x6d,0xf7]
+ vcvttpd2dqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x28,0x6d,0xf7]
+ vcvttpd2dqs %ymm23, %xmm22
+
+// CHECK: vcvttpd2dqs {sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xf8,0x18,0x6d,0xf7]
+ vcvttpd2dqs {sae}, %ymm23, %xmm22
+
+// CHECK: vcvttpd2dqs %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x2f,0x6d,0xf7]
+ vcvttpd2dqs %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2dqs {sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x9f,0x6d,0xf7]
+ vcvttpd2dqs {sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs %zmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6d,0xf7]
+ vcvttpd2dqs %zmm23, %ymm22
+
+// CHECK: vcvttpd2dqs {sae}, %zmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x18,0x6d,0xf7]
+ vcvttpd2dqs {sae}, %zmm23, %ymm22
+
+// CHECK: vcvttpd2dqs %zmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x4f,0x6d,0xf7]
+ vcvttpd2dqs %zmm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2dqs {sae}, %zmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x9f,0x6d,0xf7]
+ vcvttpd2dqs {sae}, %zmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqsx 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2dqsx 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttpd2dqsx 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfc,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2dqsx 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttpd2dqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttpd2dqsx -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2dqsx -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2dqsx 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x8f,0x6d,0x71,0x7f]
+ vcvttpd2dqsx 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x9f,0x6d,0x72,0x80]
+ vcvttpd2dqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs (%rip){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs (%rip){1to4}, %xmm22
+
+// CHECK: vcvttpd2dqsy -1024(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2dqsy -1024(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2dqsy 4064(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xaf,0x6d,0x71,0x7f]
+ vcvttpd2dqsy 4064(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xbf,0x6d,0x72,0x80]
+ vcvttpd2dqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttpd2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfc,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttpd2dqs (%rip){1to8}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs (%rip){1to8}, %ymm22
+
+// CHECK: vcvttpd2dqs -2048(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2dqs -2048(,%rbp,2), %ymm22
+
+// CHECK: vcvttpd2dqs 8128(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xcf,0x6d,0x71,0x7f]
+ vcvttpd2dqs 8128(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2dqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xdf,0x6d,0x72,0x80]
+ vcvttpd2dqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6d,0xf7]
+ vcvttpd2qqs %xmm23, %xmm22
+
+// CHECK: vcvttpd2qqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x0f,0x6d,0xf7]
+ vcvttpd2qqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2qqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x8f,0x6d,0xf7]
+ vcvttpd2qqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6d,0xf7]
+ vcvttpd2qqs %ymm23, %ymm22
+
+// CHECK: vcvttpd2qqs {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xf9,0x18,0x6d,0xf7]
+ vcvttpd2qqs {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttpd2qqs %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x2f,0x6d,0xf7]
+ vcvttpd2qqs %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2qqs {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x9f,0x6d,0xf7]
+ vcvttpd2qqs {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6d,0xf7]
+ vcvttpd2qqs %zmm23, %zmm22
+
+// CHECK: vcvttpd2qqs {sae}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x18,0x6d,0xf7]
+ vcvttpd2qqs {sae}, %zmm23, %zmm22
+
+// CHECK: vcvttpd2qqs %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x4f,0x6d,0xf7]
+ vcvttpd2qqs %zmm23, %zmm22 {%k7}
+
+// CHECK: vcvttpd2qqs {sae}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x9f,0x6d,0xf7]
+ vcvttpd2qqs {sae}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttpd2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttpd2qqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttpd2qqs -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2qqs -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2qqs 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x8f,0x6d,0x71,0x7f]
+ vcvttpd2qqs 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x9f,0x6d,0x72,0x80]
+ vcvttpd2qqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttpd2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttpd2qqs (%rip){1to4}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs (%rip){1to4}, %ymm22
+
+// CHECK: vcvttpd2qqs -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2qqs -1024(,%rbp,2), %ymm22
+
+// CHECK: vcvttpd2qqs 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xaf,0x6d,0x71,0x7f]
+ vcvttpd2qqs 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xbf,0x6d,0x72,0x80]
+ vcvttpd2qqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttpd2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttpd2qqs (%rip){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs (%rip){1to8}, %zmm22
+
+// CHECK: vcvttpd2qqs -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2qqs -2048(,%rbp,2), %zmm22
+
+// CHECK: vcvttpd2qqs 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xcf,0x6d,0x71,0x7f]
+ vcvttpd2qqs 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttpd2qqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xdf,0x6d,0x72,0x80]
+ vcvttpd2qqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6c,0xf7]
+ vcvttpd2udqs %xmm23, %xmm22
+
+// CHECK: vcvttpd2udqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x0f,0x6c,0xf7]
+ vcvttpd2udqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2udqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x8f,0x6c,0xf7]
+ vcvttpd2udqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x28,0x6c,0xf7]
+ vcvttpd2udqs %ymm23, %xmm22
+
+// CHECK: vcvttpd2udqs {sae}, %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xf8,0x18,0x6c,0xf7]
+ vcvttpd2udqs {sae}, %ymm23, %xmm22
+
+// CHECK: vcvttpd2udqs %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x2f,0x6c,0xf7]
+ vcvttpd2udqs %ymm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2udqs {sae}, %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x9f,0x6c,0xf7]
+ vcvttpd2udqs {sae}, %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs %zmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6c,0xf7]
+ vcvttpd2udqs %zmm23, %ymm22
+
+// CHECK: vcvttpd2udqs {sae}, %zmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x18,0x6c,0xf7]
+ vcvttpd2udqs {sae}, %zmm23, %ymm22
+
+// CHECK: vcvttpd2udqs %zmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x4f,0x6c,0xf7]
+ vcvttpd2udqs %zmm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2udqs {sae}, %zmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x9f,0x6c,0xf7]
+ vcvttpd2udqs {sae}, %zmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqsx 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2udqsx 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttpd2udqsx 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfc,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2udqsx 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttpd2udqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttpd2udqsx -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2udqsx -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2udqsx 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x8f,0x6c,0x71,0x7f]
+ vcvttpd2udqsx 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x9f,0x6c,0x72,0x80]
+ vcvttpd2udqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs (%rip){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs (%rip){1to4}, %xmm22
+
+// CHECK: vcvttpd2udqsy -1024(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2udqsy -1024(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2udqsy 4064(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xaf,0x6c,0x71,0x7f]
+ vcvttpd2udqsy 4064(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xbf,0x6c,0x72,0x80]
+ vcvttpd2udqs -1024(%rdx){1to4}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttpd2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfc,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttpd2udqs (%rip){1to8}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs (%rip){1to8}, %ymm22
+
+// CHECK: vcvttpd2udqs -2048(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfc,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2udqs -2048(,%rbp,2), %ymm22
+
+// CHECK: vcvttpd2udqs 8128(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xcf,0x6c,0x71,0x7f]
+ vcvttpd2udqs 8128(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2udqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xdf,0x6c,0x72,0x80]
+ vcvttpd2udqs -1024(%rdx){1to8}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6c,0xf7]
+ vcvttpd2uqqs %xmm23, %xmm22
+
+// CHECK: vcvttpd2uqqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x0f,0x6c,0xf7]
+ vcvttpd2uqqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttpd2uqqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x8f,0x6c,0xf7]
+ vcvttpd2uqqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6c,0xf7]
+ vcvttpd2uqqs %ymm23, %ymm22
+
+// CHECK: vcvttpd2uqqs {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0xf9,0x18,0x6c,0xf7]
+ vcvttpd2uqqs {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttpd2uqqs %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x2f,0x6c,0xf7]
+ vcvttpd2uqqs %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttpd2uqqs {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x9f,0x6c,0xf7]
+ vcvttpd2uqqs {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6c,0xf7]
+ vcvttpd2uqqs %zmm23, %zmm22
+
+// CHECK: vcvttpd2uqqs {sae}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x18,0x6c,0xf7]
+ vcvttpd2uqqs {sae}, %zmm23, %zmm22
+
+// CHECK: vcvttpd2uqqs %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x4f,0x6c,0xf7]
+ vcvttpd2uqqs %zmm23, %zmm22 {%k7}
+
+// CHECK: vcvttpd2uqqs {sae}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x9f,0x6c,0xf7]
+ vcvttpd2uqqs {sae}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttpd2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttpd2uqqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttpd2uqqs -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2uqqs -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttpd2uqqs 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x8f,0x6c,0x71,0x7f]
+ vcvttpd2uqqs 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x9f,0x6c,0x72,0x80]
+ vcvttpd2uqqs -1024(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttpd2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttpd2uqqs (%rip){1to4}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs (%rip){1to4}, %ymm22
+
+// CHECK: vcvttpd2uqqs -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2uqqs -1024(,%rbp,2), %ymm22
+
+// CHECK: vcvttpd2uqqs 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xaf,0x6c,0x71,0x7f]
+ vcvttpd2uqqs 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xbf,0x6c,0x72,0x80]
+ vcvttpd2uqqs -1024(%rdx){1to4}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttpd2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfd,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttpd2uqqs (%rip){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs (%rip){1to8}, %zmm22
+
+// CHECK: vcvttpd2uqqs -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfd,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2uqqs -2048(,%rbp,2), %zmm22
+
+// CHECK: vcvttpd2uqqs 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xcf,0x6c,0x71,0x7f]
+ vcvttpd2uqqs 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttpd2uqqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xdf,0x6c,0x72,0x80]
+ vcvttpd2uqqs -1024(%rdx){1to8}, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6d,0xf7]
+ vcvttps2dqs %xmm23, %xmm22
+
+// CHECK: vcvttps2dqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x0f,0x6d,0xf7]
+ vcvttps2dqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttps2dqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x8f,0x6d,0xf7]
+ vcvttps2dqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6d,0xf7]
+ vcvttps2dqs %ymm23, %ymm22
+
+// CHECK: vcvttps2dqs {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x6d,0xf7]
+ vcvttps2dqs {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttps2dqs %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x2f,0x6d,0xf7]
+ vcvttps2dqs %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2dqs {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x6d,0xf7]
+ vcvttps2dqs {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6d,0xf7]
+ vcvttps2dqs %zmm23, %zmm22
+
+// CHECK: vcvttps2dqs {sae}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x18,0x6d,0xf7]
+ vcvttps2dqs {sae}, %zmm23, %zmm22
+
+// CHECK: vcvttps2dqs %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x4f,0x6d,0xf7]
+ vcvttps2dqs %zmm23, %zmm22 {%k7}
+
+// CHECK: vcvttps2dqs {sae}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x9f,0x6d,0xf7]
+ vcvttps2dqs {sae}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttps2dqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttps2dqs (%rip){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs (%rip){1to4}, %xmm22
+
+// CHECK: vcvttps2dqs -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2dqs -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttps2dqs 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x8f,0x6d,0x71,0x7f]
+ vcvttps2dqs 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x9f,0x6d,0x72,0x80]
+ vcvttps2dqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttps2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttps2dqs (%rip){1to8}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs (%rip){1to8}, %ymm22
+
+// CHECK: vcvttps2dqs -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2dqs -1024(,%rbp,2), %ymm22
+
+// CHECK: vcvttps2dqs 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xaf,0x6d,0x71,0x7f]
+ vcvttps2dqs 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xbf,0x6d,0x72,0x80]
+ vcvttps2dqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttps2dqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttps2dqs (%rip){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs (%rip){1to16}, %zmm22
+
+// CHECK: vcvttps2dqs -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2dqs -2048(,%rbp,2), %zmm22
+
+// CHECK: vcvttps2dqs 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xcf,0x6d,0x71,0x7f]
+ vcvttps2dqs 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2dqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xdf,0x6d,0x72,0x80]
+ vcvttps2dqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6d,0xf7]
+ vcvttps2qqs %xmm23, %xmm22
+
+// CHECK: vcvttps2qqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x6d,0xf7]
+ vcvttps2qqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttps2qqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x6d,0xf7]
+ vcvttps2qqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6d,0xf7]
+ vcvttps2qqs %xmm23, %ymm22
+
+// CHECK: vcvttps2qqs {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x6d,0xf7]
+ vcvttps2qqs {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttps2qqs %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x6d,0xf7]
+ vcvttps2qqs %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2qqs {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x6d,0xf7]
+ vcvttps2qqs {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs %ymm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6d,0xf7]
+ vcvttps2qqs %ymm23, %zmm22
+
+// CHECK: vcvttps2qqs {sae}, %ymm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x18,0x6d,0xf7]
+ vcvttps2qqs {sae}, %ymm23, %zmm22
+
+// CHECK: vcvttps2qqs %ymm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x6d,0xf7]
+ vcvttps2qqs %ymm23, %zmm22 {%k7}
+
+// CHECK: vcvttps2qqs {sae}, %ymm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x9f,0x6d,0xf7]
+ vcvttps2qqs {sae}, %ymm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttps2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttps2qqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttps2qqs -256(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6d,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2qqs -256(,%rbp,2), %xmm22
+
+// CHECK: vcvttps2qqs 1016(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x6d,0x71,0x7f]
+ vcvttps2qqs 1016(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x6d,0x72,0x80]
+ vcvttps2qqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttps2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttps2qqs (%rip){1to4}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs (%rip){1to4}, %ymm22
+
+// CHECK: vcvttps2qqs -512(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2qqs -512(,%rbp,2), %ymm22
+
+// CHECK: vcvttps2qqs 2032(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x6d,0x71,0x7f]
+ vcvttps2qqs 2032(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x6d,0x72,0x80]
+ vcvttps2qqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttps2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttps2qqs (%rip){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs (%rip){1to8}, %zmm22
+
+// CHECK: vcvttps2qqs -1024(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2qqs -1024(,%rbp,2), %zmm22
+
+// CHECK: vcvttps2qqs 4064(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x6d,0x71,0x7f]
+ vcvttps2qqs 4064(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2qqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x6d,0x72,0x80]
+ vcvttps2qqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6c,0xf7]
+ vcvttps2udqs %xmm23, %xmm22
+
+// CHECK: vcvttps2udqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x0f,0x6c,0xf7]
+ vcvttps2udqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttps2udqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x8f,0x6c,0xf7]
+ vcvttps2udqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6c,0xf7]
+ vcvttps2udqs %ymm23, %ymm22
+
+// CHECK: vcvttps2udqs {sae}, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x6c,0xf7]
+ vcvttps2udqs {sae}, %ymm23, %ymm22
+
+// CHECK: vcvttps2udqs %ymm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x2f,0x6c,0xf7]
+ vcvttps2udqs %ymm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2udqs {sae}, %ymm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x6c,0xf7]
+ vcvttps2udqs {sae}, %ymm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6c,0xf7]
+ vcvttps2udqs %zmm23, %zmm22
+
+// CHECK: vcvttps2udqs {sae}, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x18,0x6c,0xf7]
+ vcvttps2udqs {sae}, %zmm23, %zmm22
+
+// CHECK: vcvttps2udqs %zmm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x4f,0x6c,0xf7]
+ vcvttps2udqs %zmm23, %zmm22 {%k7}
+
+// CHECK: vcvttps2udqs {sae}, %zmm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x9f,0x6c,0xf7]
+ vcvttps2udqs {sae}, %zmm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttps2udqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttps2udqs (%rip){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs (%rip){1to4}, %xmm22
+
+// CHECK: vcvttps2udqs -512(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2udqs -512(,%rbp,2), %xmm22
+
+// CHECK: vcvttps2udqs 2032(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x8f,0x6c,0x71,0x7f]
+ vcvttps2udqs 2032(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x9f,0x6c,0x72,0x80]
+ vcvttps2udqs -512(%rdx){1to4}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttps2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttps2udqs (%rip){1to8}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs (%rip){1to8}, %ymm22
+
+// CHECK: vcvttps2udqs -1024(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2udqs -1024(,%rbp,2), %ymm22
+
+// CHECK: vcvttps2udqs 4064(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xaf,0x6c,0x71,0x7f]
+ vcvttps2udqs 4064(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xbf,0x6c,0x72,0x80]
+ vcvttps2udqs -512(%rdx){1to8}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttps2udqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7c,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttps2udqs (%rip){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs (%rip){1to16}, %zmm22
+
+// CHECK: vcvttps2udqs -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7c,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2udqs -2048(,%rbp,2), %zmm22
+
+// CHECK: vcvttps2udqs 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xcf,0x6c,0x71,0x7f]
+ vcvttps2udqs 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2udqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xdf,0x6c,0x72,0x80]
+ vcvttps2udqs -512(%rdx){1to16}, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6c,0xf7]
+ vcvttps2uqqs %xmm23, %xmm22
+
+// CHECK: vcvttps2uqqs %xmm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x6c,0xf7]
+ vcvttps2uqqs %xmm23, %xmm22 {%k7}
+
+// CHECK: vcvttps2uqqs %xmm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x6c,0xf7]
+ vcvttps2uqqs %xmm23, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6c,0xf7]
+ vcvttps2uqqs %xmm23, %ymm22
+
+// CHECK: vcvttps2uqqs {sae}, %xmm23, %ymm22
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x6c,0xf7]
+ vcvttps2uqqs {sae}, %xmm23, %ymm22
+
+// CHECK: vcvttps2uqqs %xmm23, %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x6c,0xf7]
+ vcvttps2uqqs %xmm23, %ymm22 {%k7}
+
+// CHECK: vcvttps2uqqs {sae}, %xmm23, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x6c,0xf7]
+ vcvttps2uqqs {sae}, %xmm23, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs %ymm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6c,0xf7]
+ vcvttps2uqqs %ymm23, %zmm22
+
+// CHECK: vcvttps2uqqs {sae}, %ymm23, %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x18,0x6c,0xf7]
+ vcvttps2uqqs {sae}, %ymm23, %zmm22
+
+// CHECK: vcvttps2uqqs %ymm23, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x6c,0xf7]
+ vcvttps2uqqs %ymm23, %zmm22 {%k7}
+
+// CHECK: vcvttps2uqqs {sae}, %ymm23, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x9f,0x6c,0xf7]
+ vcvttps2uqqs {sae}, %ymm23, %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcvttps2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%r8,%rax,4), %xmm22 {%k7}
+
+// CHECK: vcvttps2uqqs (%rip){1to2}, %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs (%rip){1to2}, %xmm22
+
+// CHECK: vcvttps2uqqs -256(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6c,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2uqqs -256(,%rbp,2), %xmm22
+
+// CHECK: vcvttps2uqqs 1016(%rcx), %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x6c,0x71,0x7f]
+ vcvttps2uqqs 1016(%rcx), %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x6c,0x72,0x80]
+ vcvttps2uqqs -512(%rdx){1to2}, %xmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%rbp,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%rbp,%r14,8), %ymm22
+
+// CHECK: vcvttps2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%r8,%rax,4), %ymm22 {%k7}
+
+// CHECK: vcvttps2uqqs (%rip){1to4}, %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs (%rip){1to4}, %ymm22
+
+// CHECK: vcvttps2uqqs -512(,%rbp,2), %ymm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2uqqs -512(,%rbp,2), %ymm22
+
+// CHECK: vcvttps2uqqs 2032(%rcx), %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x6c,0x71,0x7f]
+ vcvttps2uqqs 2032(%rcx), %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x6c,0x72,0x80]
+ vcvttps2uqqs -512(%rdx){1to4}, %ymm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vcvttps2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vcvttps2uqqs (%rip){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs (%rip){1to8}, %zmm22
+
+// CHECK: vcvttps2uqqs -1024(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2uqqs -1024(,%rbp,2), %zmm22
+
+// CHECK: vcvttps2uqqs 4064(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x6c,0x71,0x7f]
+ vcvttps2uqqs 4064(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vcvttps2uqqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x6c,0x72,0x80]
+ vcvttps2uqqs -512(%rdx){1to8}, %zmm22 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/avx10_2satcvtds-64-intel.s b/llvm/test/MC/X86/avx10_2satcvtds-64-intel.s
new file mode 100644
index 00000000000000..9e9af84c054eff
--- /dev/null
+++ b/llvm/test/MC/X86/avx10_2satcvtds-64-intel.s
@@ -0,0 +1,1170 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvttsd2sis ecx, xmm22
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6d,0xce]
+ vcvttsd2sis ecx, xmm22
+
+// CHECK: vcvttsd2sis ecx, xmm22, {sae}
+// CHECK: encoding: [0x62,0xb5,0x7f,0x18,0x6d,0xce]
+ vcvttsd2sis ecx, xmm22, {sae}
+
+// CHECK: vcvttsd2sis r9, xmm22
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6d,0xce]
+ vcvttsd2sis r9, xmm22
+
+// CHECK: vcvttsd2sis r9, xmm22, {sae}
+// CHECK: encoding: [0x62,0x35,0xff,0x18,0x6d,0xce]
+ vcvttsd2sis r9, xmm22, {sae}
+
+// CHECK: vcvttsd2sis ecx, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2sis ecx, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd5,0x7f,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2sis ecx, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [rip]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2sis ecx, qword ptr [rip]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis ecx, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis ecx, qword ptr [rcx + 1016]
+
+// CHECK: vcvttsd2sis ecx, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis ecx, qword ptr [rdx - 1024]
+
+// CHECK: vcvttsd2sis r9, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2sis r9, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttsd2sis r9, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0x55,0xff,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2sis r9, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttsd2sis r9, qword ptr [rip]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2sis r9, qword ptr [rip]
+
+// CHECK: vcvttsd2sis r9, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2sis r9, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttsd2sis r9, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x49,0x7f]
+ vcvttsd2sis r9, qword ptr [rcx + 1016]
+
+// CHECK: vcvttsd2sis r9, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6d,0x4a,0x80]
+ vcvttsd2sis r9, qword ptr [rdx - 1024]
+
+// CHECK: vcvttsd2usis ecx, xmm22
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6c,0xce]
+ vcvttsd2usis ecx, xmm22
+
+// CHECK: vcvttsd2usis ecx, xmm22, {sae}
+// CHECK: encoding: [0x62,0xb5,0x7f,0x18,0x6c,0xce]
+ vcvttsd2usis ecx, xmm22, {sae}
+
+// CHECK: vcvttsd2usis r9, xmm22
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6c,0xce]
+ vcvttsd2usis r9, xmm22
+
+// CHECK: vcvttsd2usis r9, xmm22, {sae}
+// CHECK: encoding: [0x62,0x35,0xff,0x18,0x6c,0xce]
+ vcvttsd2usis r9, xmm22, {sae}
+
+// CHECK: vcvttsd2usis ecx, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb5,0x7f,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2usis ecx, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd5,0x7f,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2usis ecx, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [rip]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2usis ecx, qword ptr [rip]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis ecx, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis ecx, qword ptr [rcx + 1016]
+
+// CHECK: vcvttsd2usis ecx, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis ecx, qword ptr [rdx - 1024]
+
+// CHECK: vcvttsd2usis r9, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x35,0xff,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttsd2usis r9, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttsd2usis r9, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0x55,0xff,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttsd2usis r9, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttsd2usis r9, qword ptr [rip]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttsd2usis r9, qword ptr [rip]
+
+// CHECK: vcvttsd2usis r9, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x0c,0x6d,0x00,0xff,0xff,0xff]
+ vcvttsd2usis r9, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttsd2usis r9, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x49,0x7f]
+ vcvttsd2usis r9, qword ptr [rcx + 1016]
+
+// CHECK: vcvttsd2usis r9, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0x75,0xff,0x08,0x6c,0x4a,0x80]
+ vcvttsd2usis r9, qword ptr [rdx - 1024]
+
+// CHECK: vcvttss2sis ecx, xmm22
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6d,0xce]
+ vcvttss2sis ecx, xmm22
+
+// CHECK: vcvttss2sis ecx, xmm22, {sae}
+// CHECK: encoding: [0x62,0xb5,0x7e,0x18,0x6d,0xce]
+ vcvttss2sis ecx, xmm22, {sae}
+
+// CHECK: vcvttss2sis r9, xmm22
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6d,0xce]
+ vcvttss2sis r9, xmm22
+
+// CHECK: vcvttss2sis r9, xmm22, {sae}
+// CHECK: encoding: [0x62,0x35,0xfe,0x18,0x6d,0xce]
+ vcvttss2sis r9, xmm22, {sae}
+
+// CHECK: vcvttss2sis ecx, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2sis ecx, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttss2sis ecx, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2sis ecx, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttss2sis ecx, dword ptr [rip]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2sis ecx, dword ptr [rip]
+
+// CHECK: vcvttss2sis ecx, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis ecx, dword ptr [2*rbp - 128]
+
+// CHECK: vcvttss2sis ecx, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis ecx, dword ptr [rcx + 508]
+
+// CHECK: vcvttss2sis ecx, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis ecx, dword ptr [rdx - 512]
+
+// CHECK: vcvttss2sis r9, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6d,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2sis r9, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttss2sis r9, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x6d,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2sis r9, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttss2sis r9, dword ptr [rip]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2sis r9, dword ptr [rip]
+
+// CHECK: vcvttss2sis r9, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2sis r9, dword ptr [2*rbp - 128]
+
+// CHECK: vcvttss2sis r9, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x49,0x7f]
+ vcvttss2sis r9, dword ptr [rcx + 508]
+
+// CHECK: vcvttss2sis r9, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6d,0x4a,0x80]
+ vcvttss2sis r9, dword ptr [rdx - 512]
+
+// CHECK: vcvttss2usis ecx, xmm22
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6c,0xce]
+ vcvttss2usis ecx, xmm22
+
+// CHECK: vcvttss2usis ecx, xmm22, {sae}
+// CHECK: encoding: [0x62,0xb5,0x7e,0x18,0x6c,0xce]
+ vcvttss2usis ecx, xmm22, {sae}
+
+// CHECK: vcvttss2usis r9, xmm22
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6c,0xce]
+ vcvttss2usis r9, xmm22
+
+// CHECK: vcvttss2usis r9, xmm22, {sae}
+// CHECK: encoding: [0x62,0x35,0xfe,0x18,0x6c,0xce]
+ vcvttss2usis r9, xmm22, {sae}
+
+// CHECK: vcvttss2usis ecx, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb5,0x7e,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2usis ecx, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttss2usis ecx, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd5,0x7e,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2usis ecx, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttss2usis ecx, dword ptr [rip]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2usis ecx, dword ptr [rip]
+
+// CHECK: vcvttss2usis ecx, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis ecx, dword ptr [2*rbp - 128]
+
+// CHECK: vcvttss2usis ecx, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis ecx, dword ptr [rcx + 508]
+
+// CHECK: vcvttss2usis ecx, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis ecx, dword ptr [rdx - 512]
+
+// CHECK: vcvttss2usis r9, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x35,0xfe,0x08,0x6c,0x8c,0xf5,0x00,0x00,0x00,0x10]
+ vcvttss2usis r9, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttss2usis r9, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0x55,0xfe,0x08,0x6c,0x8c,0x80,0x23,0x01,0x00,0x00]
+ vcvttss2usis r9, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttss2usis r9, dword ptr [rip]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x0d,0x00,0x00,0x00,0x00]
+ vcvttss2usis r9, dword ptr [rip]
+
+// CHECK: vcvttss2usis r9, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x0c,0x6d,0x80,0xff,0xff,0xff]
+ vcvttss2usis r9, dword ptr [2*rbp - 128]
+
+// CHECK: vcvttss2usis r9, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x49,0x7f]
+ vcvttss2usis r9, dword ptr [rcx + 508]
+
+// CHECK: vcvttss2usis r9, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0x75,0xfe,0x08,0x6c,0x4a,0x80]
+ vcvttss2usis r9, dword ptr [rdx - 512]
+
+// CHECK: vcvttpd2dqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6d,0xf7]
+ vcvttpd2dqs xmm22, xmm23
+
+// CHECK: vcvttpd2dqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x0f,0x6d,0xf7]
+ vcvttpd2dqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x8f,0x6d,0xf7]
+ vcvttpd2dqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttpd2dqs xmm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x28,0x6d,0xf7]
+ vcvttpd2dqs xmm22, ymm23
+
+// CHECK: vcvttpd2dqs xmm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x18,0x6d,0xf7]
+ vcvttpd2dqs xmm22, ymm23, {sae}
+
+// CHECK: vcvttpd2dqs xmm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x2f,0x6d,0xf7]
+ vcvttpd2dqs xmm22 {k7}, ymm23
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x9f,0x6d,0xf7]
+ vcvttpd2dqs xmm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttpd2dqs ymm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6d,0xf7]
+ vcvttpd2dqs ymm22, zmm23
+
+// CHECK: vcvttpd2dqs ymm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x18,0x6d,0xf7]
+ vcvttpd2dqs ymm22, zmm23, {sae}
+
+// CHECK: vcvttpd2dqs ymm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x4f,0x6d,0xf7]
+ vcvttpd2dqs ymm22 {k7}, zmm23
+
+// CHECK: vcvttpd2dqs ymm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x9f,0x6d,0xf7]
+ vcvttpd2dqs ymm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttpd2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfc,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2dqs xmm22, qword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs xmm22, qword ptr [rip]{1to2}
+
+// CHECK: vcvttpd2dqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2dqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x8f,0x6d,0x71,0x7f]
+ vcvttpd2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x9f,0x6d,0x72,0x80]
+ vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvttpd2dqs xmm22, qword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs xmm22, qword ptr [rip]{1to4}
+
+// CHECK: vcvttpd2dqs xmm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2dqs xmm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0xfc,0xaf,0x6d,0x71,0x7f]
+ vcvttpd2dqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xbf,0x6d,0x72,0x80]
+ vcvttpd2dqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttpd2dqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2dqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2dqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfc,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2dqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2dqs ymm22, qword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2dqs ymm22, qword ptr [rip]{1to8}
+
+// CHECK: vcvttpd2dqs ymm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2dqs ymm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttpd2dqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfc,0xcf,0x6d,0x71,0x7f]
+ vcvttpd2dqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttpd2dqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xdf,0x6d,0x72,0x80]
+ vcvttpd2dqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+
+// CHECK: vcvttpd2qqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6d,0xf7]
+ vcvttpd2qqs xmm22, xmm23
+
+// CHECK: vcvttpd2qqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x0f,0x6d,0xf7]
+ vcvttpd2qqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttpd2qqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x8f,0x6d,0xf7]
+ vcvttpd2qqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttpd2qqs ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6d,0xf7]
+ vcvttpd2qqs ymm22, ymm23
+
+// CHECK: vcvttpd2qqs ymm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x18,0x6d,0xf7]
+ vcvttpd2qqs ymm22, ymm23, {sae}
+
+// CHECK: vcvttpd2qqs ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x2f,0x6d,0xf7]
+ vcvttpd2qqs ymm22 {k7}, ymm23
+
+// CHECK: vcvttpd2qqs ymm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x9f,0x6d,0xf7]
+ vcvttpd2qqs ymm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttpd2qqs zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6d,0xf7]
+ vcvttpd2qqs zmm22, zmm23
+
+// CHECK: vcvttpd2qqs zmm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x18,0x6d,0xf7]
+ vcvttpd2qqs zmm22, zmm23, {sae}
+
+// CHECK: vcvttpd2qqs zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x4f,0x6d,0xf7]
+ vcvttpd2qqs zmm22 {k7}, zmm23
+
+// CHECK: vcvttpd2qqs zmm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x9f,0x6d,0xf7]
+ vcvttpd2qqs zmm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttpd2qqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2qqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2qqs xmm22, qword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs xmm22, qword ptr [rip]{1to2}
+
+// CHECK: vcvttpd2qqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2qqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttpd2qqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x8f,0x6d,0x71,0x7f]
+ vcvttpd2qqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttpd2qqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x9f,0x6d,0x72,0x80]
+ vcvttpd2qqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvttpd2qqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2qqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2qqs ymm22, qword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs ymm22, qword ptr [rip]{1to4}
+
+// CHECK: vcvttpd2qqs ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2qqs ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttpd2qqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0xfd,0xaf,0x6d,0x71,0x7f]
+ vcvttpd2qqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttpd2qqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xbf,0x6d,0x72,0x80]
+ vcvttpd2qqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttpd2qqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2qqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2qqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2qqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2qqs zmm22, qword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2qqs zmm22, qword ptr [rip]{1to8}
+
+// CHECK: vcvttpd2qqs zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2qqs zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttpd2qqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfd,0xcf,0x6d,0x71,0x7f]
+ vcvttpd2qqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttpd2qqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xdf,0x6d,0x72,0x80]
+ vcvttpd2qqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+
+// CHECK: vcvttpd2udqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6c,0xf7]
+ vcvttpd2udqs xmm22, xmm23
+
+// CHECK: vcvttpd2udqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x0f,0x6c,0xf7]
+ vcvttpd2udqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x8f,0x6c,0xf7]
+ vcvttpd2udqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttpd2udqs xmm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x28,0x6c,0xf7]
+ vcvttpd2udqs xmm22, ymm23
+
+// CHECK: vcvttpd2udqs xmm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x18,0x6c,0xf7]
+ vcvttpd2udqs xmm22, ymm23, {sae}
+
+// CHECK: vcvttpd2udqs xmm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x2f,0x6c,0xf7]
+ vcvttpd2udqs xmm22 {k7}, ymm23
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf8,0x9f,0x6c,0xf7]
+ vcvttpd2udqs xmm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttpd2udqs ymm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6c,0xf7]
+ vcvttpd2udqs ymm22, zmm23
+
+// CHECK: vcvttpd2udqs ymm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x18,0x6c,0xf7]
+ vcvttpd2udqs ymm22, zmm23, {sae}
+
+// CHECK: vcvttpd2udqs ymm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfc,0x4f,0x6c,0xf7]
+ vcvttpd2udqs ymm22 {k7}, zmm23
+
+// CHECK: vcvttpd2udqs ymm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfc,0x9f,0x6c,0xf7]
+ vcvttpd2udqs ymm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttpd2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfc,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfc,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2udqs xmm22, qword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs xmm22, qword ptr [rip]{1to2}
+
+// CHECK: vcvttpd2udqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2udqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x8f,0x6c,0x71,0x7f]
+ vcvttpd2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x9f,0x6c,0x72,0x80]
+ vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvttpd2udqs xmm22, qword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs xmm22, qword ptr [rip]{1to4}
+
+// CHECK: vcvttpd2udqs xmm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2udqs xmm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0xfc,0xaf,0x6c,0x71,0x7f]
+ vcvttpd2udqs xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xbf,0x6c,0x72,0x80]
+ vcvttpd2udqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttpd2udqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfc,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2udqs ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2udqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfc,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2udqs ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2udqs ymm22, qword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfc,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2udqs ymm22, qword ptr [rip]{1to8}
+
+// CHECK: vcvttpd2udqs ymm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfc,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2udqs ymm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttpd2udqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfc,0xcf,0x6c,0x71,0x7f]
+ vcvttpd2udqs ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttpd2udqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfc,0xdf,0x6c,0x72,0x80]
+ vcvttpd2udqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+
+// CHECK: vcvttpd2uqqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6c,0xf7]
+ vcvttpd2uqqs xmm22, xmm23
+
+// CHECK: vcvttpd2uqqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x0f,0x6c,0xf7]
+ vcvttpd2uqqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttpd2uqqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x8f,0x6c,0xf7]
+ vcvttpd2uqqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttpd2uqqs ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6c,0xf7]
+ vcvttpd2uqqs ymm22, ymm23
+
+// CHECK: vcvttpd2uqqs ymm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x18,0x6c,0xf7]
+ vcvttpd2uqqs ymm22, ymm23, {sae}
+
+// CHECK: vcvttpd2uqqs ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x2f,0x6c,0xf7]
+ vcvttpd2uqqs ymm22 {k7}, ymm23
+
+// CHECK: vcvttpd2uqqs ymm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xf9,0x9f,0x6c,0xf7]
+ vcvttpd2uqqs ymm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttpd2uqqs zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6c,0xf7]
+ vcvttpd2uqqs zmm22, zmm23
+
+// CHECK: vcvttpd2uqqs zmm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x18,0x6c,0xf7]
+ vcvttpd2uqqs zmm22, zmm23, {sae}
+
+// CHECK: vcvttpd2uqqs zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0xfd,0x4f,0x6c,0xf7]
+ vcvttpd2uqqs zmm22 {k7}, zmm23
+
+// CHECK: vcvttpd2uqqs zmm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0xfd,0x9f,0x6c,0xf7]
+ vcvttpd2uqqs zmm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttpd2uqqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2uqqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2uqqs xmm22, qword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs xmm22, qword ptr [rip]{1to2}
+
+// CHECK: vcvttpd2uqqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttpd2uqqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttpd2uqqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x8f,0x6c,0x71,0x7f]
+ vcvttpd2uqqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttpd2uqqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x9f,0x6c,0x72,0x80]
+ vcvttpd2uqqs xmm22 {k7} {z}, qword ptr [rdx - 1024]{1to2}
+
+// CHECK: vcvttpd2uqqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2uqqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2uqqs ymm22, qword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs ymm22, qword ptr [rip]{1to4}
+
+// CHECK: vcvttpd2uqqs ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttpd2uqqs ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttpd2uqqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0xfd,0xaf,0x6c,0x71,0x7f]
+ vcvttpd2uqqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttpd2uqqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xbf,0x6c,0x72,0x80]
+ vcvttpd2uqqs ymm22 {k7} {z}, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vcvttpd2uqqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfd,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttpd2uqqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttpd2uqqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfd,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttpd2uqqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttpd2uqqs zmm22, qword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfd,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttpd2uqqs zmm22, qword ptr [rip]{1to8}
+
+// CHECK: vcvttpd2uqqs zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfd,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttpd2uqqs zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttpd2uqqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfd,0xcf,0x6c,0x71,0x7f]
+ vcvttpd2uqqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttpd2uqqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xe5,0xfd,0xdf,0x6c,0x72,0x80]
+ vcvttpd2uqqs zmm22 {k7} {z}, qword ptr [rdx - 1024]{1to8}
+
+// CHECK: vcvttps2dqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6d,0xf7]
+ vcvttps2dqs xmm22, xmm23
+
+// CHECK: vcvttps2dqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x0f,0x6d,0xf7]
+ vcvttps2dqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttps2dqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x8f,0x6d,0xf7]
+ vcvttps2dqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttps2dqs ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6d,0xf7]
+ vcvttps2dqs ymm22, ymm23
+
+// CHECK: vcvttps2dqs ymm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x6d,0xf7]
+ vcvttps2dqs ymm22, ymm23, {sae}
+
+// CHECK: vcvttps2dqs ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x2f,0x6d,0xf7]
+ vcvttps2dqs ymm22 {k7}, ymm23
+
+// CHECK: vcvttps2dqs ymm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x6d,0xf7]
+ vcvttps2dqs ymm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttps2dqs zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6d,0xf7]
+ vcvttps2dqs zmm22, zmm23
+
+// CHECK: vcvttps2dqs zmm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x18,0x6d,0xf7]
+ vcvttps2dqs zmm22, zmm23, {sae}
+
+// CHECK: vcvttps2dqs zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x4f,0x6d,0xf7]
+ vcvttps2dqs zmm22 {k7}, zmm23
+
+// CHECK: vcvttps2dqs zmm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x9f,0x6d,0xf7]
+ vcvttps2dqs zmm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttps2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2dqs xmm22, dword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs xmm22, dword ptr [rip]{1to4}
+
+// CHECK: vcvttps2dqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x08,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2dqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttps2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x8f,0x6d,0x71,0x7f]
+ vcvttps2dqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttps2dqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x9f,0x6d,0x72,0x80]
+ vcvttps2dqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvttps2dqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2dqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2dqs ymm22, dword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs ymm22, dword ptr [rip]{1to8}
+
+// CHECK: vcvttps2dqs ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x28,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2dqs ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttps2dqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7c,0xaf,0x6d,0x71,0x7f]
+ vcvttps2dqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttps2dqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xbf,0x6d,0x72,0x80]
+ vcvttps2dqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvttps2dqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2dqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2dqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2dqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2dqs zmm22, dword ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2dqs zmm22, dword ptr [rip]{1to16}
+
+// CHECK: vcvttps2dqs zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x48,0x6d,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2dqs zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttps2dqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7c,0xcf,0x6d,0x71,0x7f]
+ vcvttps2dqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttps2dqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xdf,0x6d,0x72,0x80]
+ vcvttps2dqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vcvttps2qqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6d,0xf7]
+ vcvttps2qqs xmm22, xmm23
+
+// CHECK: vcvttps2qqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x6d,0xf7]
+ vcvttps2qqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttps2qqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x6d,0xf7]
+ vcvttps2qqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttps2qqs ymm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6d,0xf7]
+ vcvttps2qqs ymm22, xmm23
+
+// CHECK: vcvttps2qqs ymm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x6d,0xf7]
+ vcvttps2qqs ymm22, xmm23, {sae}
+
+// CHECK: vcvttps2qqs ymm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x6d,0xf7]
+ vcvttps2qqs ymm22 {k7}, xmm23
+
+// CHECK: vcvttps2qqs ymm22 {k7} {z}, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x6d,0xf7]
+ vcvttps2qqs ymm22 {k7} {z}, xmm23, {sae}
+
+// CHECK: vcvttps2qqs zmm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6d,0xf7]
+ vcvttps2qqs zmm22, ymm23
+
+// CHECK: vcvttps2qqs zmm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x18,0x6d,0xf7]
+ vcvttps2qqs zmm22, ymm23, {sae}
+
+// CHECK: vcvttps2qqs zmm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x6d,0xf7]
+ vcvttps2qqs zmm22 {k7}, ymm23
+
+// CHECK: vcvttps2qqs zmm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x9f,0x6d,0xf7]
+ vcvttps2qqs zmm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttps2qqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2qqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2qqs xmm22, dword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs xmm22, dword ptr [rip]{1to2}
+
+// CHECK: vcvttps2qqs xmm22, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6d,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2qqs xmm22, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttps2qqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x6d,0x71,0x7f]
+ vcvttps2qqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+
+// CHECK: vcvttps2qqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x6d,0x72,0x80]
+ vcvttps2qqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+
+// CHECK: vcvttps2qqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2qqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2qqs ymm22, dword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs ymm22, dword ptr [rip]{1to4}
+
+// CHECK: vcvttps2qqs ymm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x6d,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2qqs ymm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttps2qqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x6d,0x71,0x7f]
+ vcvttps2qqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttps2qqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x6d,0x72,0x80]
+ vcvttps2qqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvttps2qqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6d,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2qqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2qqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x6d,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2qqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2qqs zmm22, dword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x6d,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2qqs zmm22, dword ptr [rip]{1to8}
+
+// CHECK: vcvttps2qqs zmm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x6d,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2qqs zmm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttps2qqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x6d,0x71,0x7f]
+ vcvttps2qqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttps2qqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x6d,0x72,0x80]
+ vcvttps2qqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvttps2udqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6c,0xf7]
+ vcvttps2udqs xmm22, xmm23
+
+// CHECK: vcvttps2udqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x0f,0x6c,0xf7]
+ vcvttps2udqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttps2udqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x8f,0x6c,0xf7]
+ vcvttps2udqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttps2udqs ymm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6c,0xf7]
+ vcvttps2udqs ymm22, ymm23
+
+// CHECK: vcvttps2udqs ymm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x78,0x18,0x6c,0xf7]
+ vcvttps2udqs ymm22, ymm23, {sae}
+
+// CHECK: vcvttps2udqs ymm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x2f,0x6c,0xf7]
+ vcvttps2udqs ymm22 {k7}, ymm23
+
+// CHECK: vcvttps2udqs ymm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x78,0x9f,0x6c,0xf7]
+ vcvttps2udqs ymm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttps2udqs zmm22, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6c,0xf7]
+ vcvttps2udqs zmm22, zmm23
+
+// CHECK: vcvttps2udqs zmm22, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x18,0x6c,0xf7]
+ vcvttps2udqs zmm22, zmm23, {sae}
+
+// CHECK: vcvttps2udqs zmm22 {k7}, zmm23
+// CHECK: encoding: [0x62,0xa5,0x7c,0x4f,0x6c,0xf7]
+ vcvttps2udqs zmm22 {k7}, zmm23
+
+// CHECK: vcvttps2udqs zmm22 {k7} {z}, zmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7c,0x9f,0x6c,0xf7]
+ vcvttps2udqs zmm22 {k7} {z}, zmm23, {sae}
+
+// CHECK: vcvttps2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2udqs xmm22, dword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs xmm22, dword ptr [rip]{1to4}
+
+// CHECK: vcvttps2udqs xmm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x08,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2udqs xmm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttps2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x8f,0x6c,0x71,0x7f]
+ vcvttps2udqs xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttps2udqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x9f,0x6c,0x72,0x80]
+ vcvttps2udqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvttps2udqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs ymm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2udqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs ymm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2udqs ymm22, dword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs ymm22, dword ptr [rip]{1to8}
+
+// CHECK: vcvttps2udqs ymm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x28,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2udqs ymm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttps2udqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7c,0xaf,0x6c,0x71,0x7f]
+ vcvttps2udqs ymm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttps2udqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xbf,0x6c,0x72,0x80]
+ vcvttps2udqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvttps2udqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7c,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2udqs zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2udqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7c,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2udqs zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2udqs zmm22, dword ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7c,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2udqs zmm22, dword ptr [rip]{1to16}
+
+// CHECK: vcvttps2udqs zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7c,0x48,0x6c,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vcvttps2udqs zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vcvttps2udqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7c,0xcf,0x6c,0x71,0x7f]
+ vcvttps2udqs zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvttps2udqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0xe5,0x7c,0xdf,0x6c,0x72,0x80]
+ vcvttps2udqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vcvttps2uqqs xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6c,0xf7]
+ vcvttps2uqqs xmm22, xmm23
+
+// CHECK: vcvttps2uqqs xmm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x0f,0x6c,0xf7]
+ vcvttps2uqqs xmm22 {k7}, xmm23
+
+// CHECK: vcvttps2uqqs xmm22 {k7} {z}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x8f,0x6c,0xf7]
+ vcvttps2uqqs xmm22 {k7} {z}, xmm23
+
+// CHECK: vcvttps2uqqs ymm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6c,0xf7]
+ vcvttps2uqqs ymm22, xmm23
+
+// CHECK: vcvttps2uqqs ymm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x79,0x18,0x6c,0xf7]
+ vcvttps2uqqs ymm22, xmm23, {sae}
+
+// CHECK: vcvttps2uqqs ymm22 {k7}, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x2f,0x6c,0xf7]
+ vcvttps2uqqs ymm22 {k7}, xmm23
+
+// CHECK: vcvttps2uqqs ymm22 {k7} {z}, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x79,0x9f,0x6c,0xf7]
+ vcvttps2uqqs ymm22 {k7} {z}, xmm23, {sae}
+
+// CHECK: vcvttps2uqqs zmm22, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6c,0xf7]
+ vcvttps2uqqs zmm22, ymm23
+
+// CHECK: vcvttps2uqqs zmm22, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x18,0x6c,0xf7]
+ vcvttps2uqqs zmm22, ymm23, {sae}
+
+// CHECK: vcvttps2uqqs zmm22 {k7}, ymm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x4f,0x6c,0xf7]
+ vcvttps2uqqs zmm22 {k7}, ymm23
+
+// CHECK: vcvttps2uqqs zmm22 {k7} {z}, ymm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7d,0x9f,0x6c,0xf7]
+ vcvttps2uqqs zmm22 {k7} {z}, ymm23, {sae}
+
+// CHECK: vcvttps2uqqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs xmm22, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2uqqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x0f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs xmm22 {k7}, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2uqqs xmm22, dword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x18,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs xmm22, dword ptr [rip]{1to2}
+
+// CHECK: vcvttps2uqqs xmm22, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6c,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcvttps2uqqs xmm22, qword ptr [2*rbp - 256]
+
+// CHECK: vcvttps2uqqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x8f,0x6c,0x71,0x7f]
+ vcvttps2uqqs xmm22 {k7} {z}, qword ptr [rcx + 1016]
+
+// CHECK: vcvttps2uqqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x9f,0x6c,0x72,0x80]
+ vcvttps2uqqs xmm22 {k7} {z}, dword ptr [rdx - 512]{1to2}
+
+// CHECK: vcvttps2uqqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x28,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs ymm22, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2uqqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x2f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs ymm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2uqqs ymm22, dword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x38,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs ymm22, dword ptr [rip]{1to4}
+
+// CHECK: vcvttps2uqqs ymm22, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x28,0x6c,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vcvttps2uqqs ymm22, xmmword ptr [2*rbp - 512]
+
+// CHECK: vcvttps2uqqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xaf,0x6c,0x71,0x7f]
+ vcvttps2uqqs ymm22 {k7} {z}, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvttps2uqqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xbf,0x6c,0x72,0x80]
+ vcvttps2uqqs ymm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvttps2uqqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x48,0x6c,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvttps2uqqs zmm22, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvttps2uqqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x4f,0x6c,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcvttps2uqqs zmm22 {k7}, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcvttps2uqqs zmm22, dword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0x58,0x6c,0x35,0x00,0x00,0x00,0x00]
+ vcvttps2uqqs zmm22, dword ptr [rip]{1to8}
+
+// CHECK: vcvttps2uqqs zmm22, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x48,0x6c,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vcvttps2uqqs zmm22, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vcvttps2uqqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe5,0x7d,0xcf,0x6c,0x71,0x7f]
+ vcvttps2uqqs zmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvttps2uqqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0xe5,0x7d,0xdf,0x6c,0x72,0x80]
+ vcvttps2uqqs zmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 523db92bc543ea..49fd2be3c6064a 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1324,17 +1324,29 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCVTTNEBF162IUBSZ128rr, X86::VCVTTNEBF162IUBSZ128rm, 0},
{X86::VCVTTNEBF162IUBSZ256rr, X86::VCVTTNEBF162IUBSZ256rm, 0},
{X86::VCVTTNEBF162IUBSZrr, X86::VCVTTNEBF162IUBSZrm, 0},
+ {X86::VCVTTPD2DQSZ128rr, X86::VCVTTPD2DQSZ128rm, 0},
+ {X86::VCVTTPD2DQSZ256rr, X86::VCVTTPD2DQSZ256rm, 0},
+ {X86::VCVTTPD2DQSZrr, X86::VCVTTPD2DQSZrm, 0},
{X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0},
{X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rm, 0},
{X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rm, 0},
{X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrm, 0},
{X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0},
+ {X86::VCVTTPD2QQSZ128rr, X86::VCVTTPD2QQSZ128rm, 0},
+ {X86::VCVTTPD2QQSZ256rr, X86::VCVTTPD2QQSZ256rm, 0},
+ {X86::VCVTTPD2QQSZrr, X86::VCVTTPD2QQSZrm, 0},
{X86::VCVTTPD2QQZ128rr, X86::VCVTTPD2QQZ128rm, 0},
{X86::VCVTTPD2QQZ256rr, X86::VCVTTPD2QQZ256rm, 0},
{X86::VCVTTPD2QQZrr, X86::VCVTTPD2QQZrm, 0},
+ {X86::VCVTTPD2UDQSZ128rr, X86::VCVTTPD2UDQSZ128rm, 0},
+ {X86::VCVTTPD2UDQSZ256rr, X86::VCVTTPD2UDQSZ256rm, 0},
+ {X86::VCVTTPD2UDQSZrr, X86::VCVTTPD2UDQSZrm, 0},
{X86::VCVTTPD2UDQZ128rr, X86::VCVTTPD2UDQZ128rm, 0},
{X86::VCVTTPD2UDQZ256rr, X86::VCVTTPD2UDQZ256rm, 0},
{X86::VCVTTPD2UDQZrr, X86::VCVTTPD2UDQZrm, 0},
+ {X86::VCVTTPD2UQQSZ128rr, X86::VCVTTPD2UQQSZ128rm, 0},
+ {X86::VCVTTPD2UQQSZ256rr, X86::VCVTTPD2UQQSZ256rm, 0},
+ {X86::VCVTTPD2UQQSZrr, X86::VCVTTPD2UQQSZrm, 0},
{X86::VCVTTPD2UQQZ128rr, X86::VCVTTPD2UQQZ128rm, 0},
{X86::VCVTTPD2UQQZ256rr, X86::VCVTTPD2UQQZ256rm, 0},
{X86::VCVTTPD2UQQZrr, X86::VCVTTPD2UQQZrm, 0},
@@ -1362,6 +1374,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCVTTPH2WZ128rr, X86::VCVTTPH2WZ128rm, 0},
{X86::VCVTTPH2WZ256rr, X86::VCVTTPH2WZ256rm, 0},
{X86::VCVTTPH2WZrr, X86::VCVTTPH2WZrm, 0},
+ {X86::VCVTTPS2DQSZ128rr, X86::VCVTTPS2DQSZ128rm, 0},
+ {X86::VCVTTPS2DQSZ256rr, X86::VCVTTPS2DQSZ256rm, 0},
+ {X86::VCVTTPS2DQSZrr, X86::VCVTTPS2DQSZrm, 0},
{X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0},
{X86::VCVTTPS2DQZ128rr, X86::VCVTTPS2DQZ128rm, 0},
{X86::VCVTTPS2DQZ256rr, X86::VCVTTPS2DQZ256rm, 0},
@@ -1373,25 +1388,42 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCVTTPS2IUBSZ128rr, X86::VCVTTPS2IUBSZ128rm, 0},
{X86::VCVTTPS2IUBSZ256rr, X86::VCVTTPS2IUBSZ256rm, 0},
{X86::VCVTTPS2IUBSZrr, X86::VCVTTPS2IUBSZrm, 0},
+ {X86::VCVTTPS2QQSZ128rr, X86::VCVTTPS2QQSZ128rm, TB_NO_REVERSE},
+ {X86::VCVTTPS2QQSZ256rr, X86::VCVTTPS2QQSZ256rm, 0},
+ {X86::VCVTTPS2QQSZrr, X86::VCVTTPS2QQSZrm, 0},
{X86::VCVTTPS2QQZ128rr, X86::VCVTTPS2QQZ128rm, TB_NO_REVERSE},
{X86::VCVTTPS2QQZ256rr, X86::VCVTTPS2QQZ256rm, 0},
{X86::VCVTTPS2QQZrr, X86::VCVTTPS2QQZrm, 0},
+ {X86::VCVTTPS2UDQSZ128rr, X86::VCVTTPS2UDQSZ128rm, 0},
+ {X86::VCVTTPS2UDQSZ256rr, X86::VCVTTPS2UDQSZ256rm, 0},
+ {X86::VCVTTPS2UDQSZrr, X86::VCVTTPS2UDQSZrm, 0},
{X86::VCVTTPS2UDQZ128rr, X86::VCVTTPS2UDQZ128rm, 0},
{X86::VCVTTPS2UDQZ256rr, X86::VCVTTPS2UDQZ256rm, 0},
{X86::VCVTTPS2UDQZrr, X86::VCVTTPS2UDQZrm, 0},
+ {X86::VCVTTPS2UQQSZ128rr, X86::VCVTTPS2UQQSZ128rm, TB_NO_REVERSE},
+ {X86::VCVTTPS2UQQSZ256rr, X86::VCVTTPS2UQQSZ256rm, 0},
+ {X86::VCVTTPS2UQQSZrr, X86::VCVTTPS2UQQSZrm, 0},
{X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rm, TB_NO_REVERSE},
{X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rm, 0},
{X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrm, 0},
+ {X86::VCVTTSD2SI64Srr, X86::VCVTTSD2SI64Srm, 0},
+ {X86::VCVTTSD2SI64Srr_Int, X86::VCVTTSD2SI64Srm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2SI64Zrr, X86::VCVTTSD2SI64Zrm, 0},
{X86::VCVTTSD2SI64Zrr_Int, X86::VCVTTSD2SI64Zrm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0},
{X86::VCVTTSD2SI64rr_Int, X86::VCVTTSD2SI64rm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSD2SISrr, X86::VCVTTSD2SISrm, 0},
+ {X86::VCVTTSD2SISrr_Int, X86::VCVTTSD2SISrm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2SIZrr, X86::VCVTTSD2SIZrm, 0},
{X86::VCVTTSD2SIZrr_Int, X86::VCVTTSD2SIZrm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0},
{X86::VCVTTSD2SIrr_Int, X86::VCVTTSD2SIrm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSD2USI64Srr, X86::VCVTTSD2USI64Srm, 0},
+ {X86::VCVTTSD2USI64Srr_Int, X86::VCVTTSD2USI64Srm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2USI64Zrr, X86::VCVTTSD2USI64Zrm, 0},
{X86::VCVTTSD2USI64Zrr_Int, X86::VCVTTSD2USI64Zrm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSD2USISrr, X86::VCVTTSD2USISrm, 0},
+ {X86::VCVTTSD2USISrr_Int, X86::VCVTTSD2USISrm_Int, TB_NO_REVERSE},
{X86::VCVTTSD2USIZrr, X86::VCVTTSD2USIZrm, 0},
{X86::VCVTTSD2USIZrr_Int, X86::VCVTTSD2USIZrm_Int, TB_NO_REVERSE},
{X86::VCVTTSH2SI64Zrr, X86::VCVTTSH2SI64Zrm, 0},
@@ -1402,16 +1434,24 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCVTTSH2USI64Zrr_Int, X86::VCVTTSH2USI64Zrm_Int, TB_NO_REVERSE},
{X86::VCVTTSH2USIZrr, X86::VCVTTSH2USIZrm, 0},
{X86::VCVTTSH2USIZrr_Int, X86::VCVTTSH2USIZrm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSS2SI64Srr, X86::VCVTTSS2SI64Srm, 0},
+ {X86::VCVTTSS2SI64Srr_Int, X86::VCVTTSS2SI64Srm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2SI64Zrr, X86::VCVTTSS2SI64Zrm, 0},
{X86::VCVTTSS2SI64Zrr_Int, X86::VCVTTSS2SI64Zrm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0},
{X86::VCVTTSS2SI64rr_Int, X86::VCVTTSS2SI64rm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSS2SISrr, X86::VCVTTSS2SISrm, 0},
+ {X86::VCVTTSS2SISrr_Int, X86::VCVTTSS2SISrm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2SIZrr, X86::VCVTTSS2SIZrm, 0},
{X86::VCVTTSS2SIZrr_Int, X86::VCVTTSS2SIZrm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0},
{X86::VCVTTSS2SIrr_Int, X86::VCVTTSS2SIrm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSS2USI64Srr, X86::VCVTTSS2USI64Srm, 0},
+ {X86::VCVTTSS2USI64Srr_Int, X86::VCVTTSS2USI64Srm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2USI64Zrr, X86::VCVTTSS2USI64Zrm, 0},
{X86::VCVTTSS2USI64Zrr_Int, X86::VCVTTSS2USI64Zrm_Int, TB_NO_REVERSE},
+ {X86::VCVTTSS2USISrr, X86::VCVTTSS2USISrm, 0},
+ {X86::VCVTTSS2USISrr_Int, X86::VCVTTSS2USISrm_Int, TB_NO_REVERSE},
{X86::VCVTTSS2USIZrr, X86::VCVTTSS2USIZrm, 0},
{X86::VCVTTSS2USIZrr_Int, X86::VCVTTSS2USIZrm_Int, TB_NO_REVERSE},
{X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE},
@@ -2587,15 +2627,27 @@ static const X86FoldTableEntry Table2[] = {
{X86::VCVTTNEBF162IUBSZ128rrkz, X86::VCVTTNEBF162IUBSZ128rmkz, 0},
{X86::VCVTTNEBF162IUBSZ256rrkz, X86::VCVTTNEBF162IUBSZ256rmkz, 0},
{X86::VCVTTNEBF162IUBSZrrkz, X86::VCVTTNEBF162IUBSZrmkz, 0},
+ {X86::VCVTTPD2DQSZ128rrkz, X86::VCVTTPD2DQSZ128rmkz, 0},
+ {X86::VCVTTPD2DQSZ256rrkz, X86::VCVTTPD2DQSZ256rmkz, 0},
+ {X86::VCVTTPD2DQSZrrkz, X86::VCVTTPD2DQSZrmkz, 0},
{X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmkz, 0},
{X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmkz, 0},
{X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmkz, 0},
+ {X86::VCVTTPD2QQSZ128rrkz, X86::VCVTTPD2QQSZ128rmkz, 0},
+ {X86::VCVTTPD2QQSZ256rrkz, X86::VCVTTPD2QQSZ256rmkz, 0},
+ {X86::VCVTTPD2QQSZrrkz, X86::VCVTTPD2QQSZrmkz, 0},
{X86::VCVTTPD2QQZ128rrkz, X86::VCVTTPD2QQZ128rmkz, 0},
{X86::VCVTTPD2QQZ256rrkz, X86::VCVTTPD2QQZ256rmkz, 0},
{X86::VCVTTPD2QQZrrkz, X86::VCVTTPD2QQZrmkz, 0},
+ {X86::VCVTTPD2UDQSZ128rrkz, X86::VCVTTPD2UDQSZ128rmkz, 0},
+ {X86::VCVTTPD2UDQSZ256rrkz, X86::VCVTTPD2UDQSZ256rmkz, 0},
+ {X86::VCVTTPD2UDQSZrrkz, X86::VCVTTPD2UDQSZrmkz, 0},
{X86::VCVTTPD2UDQZ128rrkz, X86::VCVTTPD2UDQZ128rmkz, 0},
{X86::VCVTTPD2UDQZ256rrkz, X86::VCVTTPD2UDQZ256rmkz, 0},
{X86::VCVTTPD2UDQZrrkz, X86::VCVTTPD2UDQZrmkz, 0},
+ {X86::VCVTTPD2UQQSZ128rrkz, X86::VCVTTPD2UQQSZ128rmkz, 0},
+ {X86::VCVTTPD2UQQSZ256rrkz, X86::VCVTTPD2UQQSZ256rmkz, 0},
+ {X86::VCVTTPD2UQQSZrrkz, X86::VCVTTPD2UQQSZrmkz, 0},
{X86::VCVTTPD2UQQZ128rrkz, X86::VCVTTPD2UQQZ128rmkz, 0},
{X86::VCVTTPD2UQQZ256rrkz, X86::VCVTTPD2UQQZ256rmkz, 0},
{X86::VCVTTPD2UQQZrrkz, X86::VCVTTPD2UQQZrmkz, 0},
@@ -2623,6 +2675,9 @@ static const X86FoldTableEntry Table2[] = {
{X86::VCVTTPH2WZ128rrkz, X86::VCVTTPH2WZ128rmkz, 0},
{X86::VCVTTPH2WZ256rrkz, X86::VCVTTPH2WZ256rmkz, 0},
{X86::VCVTTPH2WZrrkz, X86::VCVTTPH2WZrmkz, 0},
+ {X86::VCVTTPS2DQSZ128rrkz, X86::VCVTTPS2DQSZ128rmkz, 0},
+ {X86::VCVTTPS2DQSZ256rrkz, X86::VCVTTPS2DQSZ256rmkz, 0},
+ {X86::VCVTTPS2DQSZrrkz, X86::VCVTTPS2DQSZrmkz, 0},
{X86::VCVTTPS2DQZ128rrkz, X86::VCVTTPS2DQZ128rmkz, 0},
{X86::VCVTTPS2DQZ256rrkz, X86::VCVTTPS2DQZ256rmkz, 0},
{X86::VCVTTPS2DQZrrkz, X86::VCVTTPS2DQZrmkz, 0},
@@ -2632,12 +2687,21 @@ static const X86FoldTableEntry Table2[] = {
{X86::VCVTTPS2IUBSZ128rrkz, X86::VCVTTPS2IUBSZ128rmkz, 0},
{X86::VCVTTPS2IUBSZ256rrkz, X86::VCVTTPS2IUBSZ256rmkz, 0},
{X86::VCVTTPS2IUBSZrrkz, X86::VCVTTPS2IUBSZrmkz, 0},
+ {X86::VCVTTPS2QQSZ128rrkz, X86::VCVTTPS2QQSZ128rmkz, TB_NO_REVERSE},
+ {X86::VCVTTPS2QQSZ256rrkz, X86::VCVTTPS2QQSZ256rmkz, 0},
+ {X86::VCVTTPS2QQSZrrkz, X86::VCVTTPS2QQSZrmkz, 0},
{X86::VCVTTPS2QQZ128rrkz, X86::VCVTTPS2QQZ128rmkz, TB_NO_REVERSE},
{X86::VCVTTPS2QQZ256rrkz, X86::VCVTTPS2QQZ256rmkz, 0},
{X86::VCVTTPS2QQZrrkz, X86::VCVTTPS2QQZrmkz, 0},
+ {X86::VCVTTPS2UDQSZ128rrkz, X86::VCVTTPS2UDQSZ128rmkz, 0},
+ {X86::VCVTTPS2UDQSZ256rrkz, X86::VCVTTPS2UDQSZ256rmkz, 0},
+ {X86::VCVTTPS2UDQSZrrkz, X86::VCVTTPS2UDQSZrmkz, 0},
{X86::VCVTTPS2UDQZ128rrkz, X86::VCVTTPS2UDQZ128rmkz, 0},
{X86::VCVTTPS2UDQZ256rrkz, X86::VCVTTPS2UDQZ256rmkz, 0},
{X86::VCVTTPS2UDQZrrkz, X86::VCVTTPS2UDQZrmkz, 0},
+ {X86::VCVTTPS2UQQSZ128rrkz, X86::VCVTTPS2UQQSZ128rmkz, TB_NO_REVERSE},
+ {X86::VCVTTPS2UQQSZ256rrkz, X86::VCVTTPS2UQQSZ256rmkz, 0},
+ {X86::VCVTTPS2UQQSZrrkz, X86::VCVTTPS2UQQSZrmkz, 0},
{X86::VCVTTPS2UQQZ128rrkz, X86::VCVTTPS2UQQZ128rmkz, TB_NO_REVERSE},
{X86::VCVTTPS2UQQZ256rrkz, X86::VCVTTPS2UQQZ256rmkz, 0},
{X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmkz, 0},
@@ -4187,15 +4251,27 @@ static const X86FoldTableEntry Table3[] = {
{X86::VCVTTNEBF162IUBSZ128rrk, X86::VCVTTNEBF162IUBSZ128rmk, 0},
{X86::VCVTTNEBF162IUBSZ256rrk, X86::VCVTTNEBF162IUBSZ256rmk, 0},
{X86::VCVTTNEBF162IUBSZrrk, X86::VCVTTNEBF162IUBSZrmk, 0},
+ {X86::VCVTTPD2DQSZ128rrk, X86::VCVTTPD2DQSZ128rmk, 0},
+ {X86::VCVTTPD2DQSZ256rrk, X86::VCVTTPD2DQSZ256rmk, 0},
+ {X86::VCVTTPD2DQSZrrk, X86::VCVTTPD2DQSZrmk, 0},
{X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmk, 0},
{X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmk, 0},
{X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmk, 0},
+ {X86::VCVTTPD2QQSZ128rrk, X86::VCVTTPD2QQSZ128rmk, 0},
+ {X86::VCVTTPD2QQSZ256rrk, X86::VCVTTPD2QQSZ256rmk, 0},
+ {X86::VCVTTPD2QQSZrrk, X86::VCVTTPD2QQSZrmk, 0},
{X86::VCVTTPD2QQZ128rrk, X86::VCVTTPD2QQZ128rmk, 0},
{X86::VCVTTPD2QQZ256rrk, X86::VCVTTPD2QQZ256rmk, 0},
{X86::VCVTTPD2QQZrrk, X86::VCVTTPD2QQZrmk, 0},
+ {X86::VCVTTPD2UDQSZ128rrk, X86::VCVTTPD2UDQSZ128rmk, 0},
+ {X86::VCVTTPD2UDQSZ256rrk, X86::VCVTTPD2UDQSZ256rmk, 0},
+ {X86::VCVTTPD2UDQSZrrk, X86::VCVTTPD2UDQSZrmk, 0},
{X86::VCVTTPD2UDQZ128rrk, X86::VCVTTPD2UDQZ128rmk, 0},
{X86::VCVTTPD2UDQZ256rrk, X86::VCVTTPD2UDQZ256rmk, 0},
{X86::VCVTTPD2UDQZrrk, X86::VCVTTPD2UDQZrmk, 0},
+ {X86::VCVTTPD2UQQSZ128rrk, X86::VCVTTPD2UQQSZ128rmk, 0},
+ {X86::VCVTTPD2UQQSZ256rrk, X86::VCVTTPD2UQQSZ256rmk, 0},
+ {X86::VCVTTPD2UQQSZrrk, X86::VCVTTPD2UQQSZrmk, 0},
{X86::VCVTTPD2UQQZ128rrk, X86::VCVTTPD2UQQZ128rmk, 0},
{X86::VCVTTPD2UQQZ256rrk, X86::VCVTTPD2UQQZ256rmk, 0},
{X86::VCVTTPD2UQQZrrk, X86::VCVTTPD2UQQZrmk, 0},
@@ -4223,6 +4299,9 @@ static const X86FoldTableEntry Table3[] = {
{X86::VCVTTPH2WZ128rrk, X86::VCVTTPH2WZ128rmk, 0},
{X86::VCVTTPH2WZ256rrk, X86::VCVTTPH2WZ256rmk, 0},
{X86::VCVTTPH2WZrrk, X86::VCVTTPH2WZrmk, 0},
+ {X86::VCVTTPS2DQSZ128rrk, X86::VCVTTPS2DQSZ128rmk, 0},
+ {X86::VCVTTPS2DQSZ256rrk, X86::VCVTTPS2DQSZ256rmk, 0},
+ {X86::VCVTTPS2DQSZrrk, X86::VCVTTPS2DQSZrmk, 0},
{X86::VCVTTPS2DQZ128rrk, X86::VCVTTPS2DQZ128rmk, 0},
{X86::VCVTTPS2DQZ256rrk, X86::VCVTTPS2DQZ256rmk, 0},
{X86::VCVTTPS2DQZrrk, X86::VCVTTPS2DQZrmk, 0},
@@ -4232,12 +4311,21 @@ static const X86FoldTableEntry Table3[] = {
{X86::VCVTTPS2IUBSZ128rrk, X86::VCVTTPS2IUBSZ128rmk, 0},
{X86::VCVTTPS2IUBSZ256rrk, X86::VCVTTPS2IUBSZ256rmk, 0},
{X86::VCVTTPS2IUBSZrrk, X86::VCVTTPS2IUBSZrmk, 0},
+ {X86::VCVTTPS2QQSZ128rrk, X86::VCVTTPS2QQSZ128rmk, TB_NO_REVERSE},
+ {X86::VCVTTPS2QQSZ256rrk, X86::VCVTTPS2QQSZ256rmk, 0},
+ {X86::VCVTTPS2QQSZrrk, X86::VCVTTPS2QQSZrmk, 0},
{X86::VCVTTPS2QQZ128rrk, X86::VCVTTPS2QQZ128rmk, TB_NO_REVERSE},
{X86::VCVTTPS2QQZ256rrk, X86::VCVTTPS2QQZ256rmk, 0},
{X86::VCVTTPS2QQZrrk, X86::VCVTTPS2QQZrmk, 0},
+ {X86::VCVTTPS2UDQSZ128rrk, X86::VCVTTPS2UDQSZ128rmk, 0},
+ {X86::VCVTTPS2UDQSZ256rrk, X86::VCVTTPS2UDQSZ256rmk, 0},
+ {X86::VCVTTPS2UDQSZrrk, X86::VCVTTPS2UDQSZrmk, 0},
{X86::VCVTTPS2UDQZ128rrk, X86::VCVTTPS2UDQZ128rmk, 0},
{X86::VCVTTPS2UDQZ256rrk, X86::VCVTTPS2UDQZ256rmk, 0},
{X86::VCVTTPS2UDQZrrk, X86::VCVTTPS2UDQZrmk, 0},
+ {X86::VCVTTPS2UQQSZ128rrk, X86::VCVTTPS2UQQSZ128rmk, TB_NO_REVERSE},
+ {X86::VCVTTPS2UQQSZ256rrk, X86::VCVTTPS2UQQSZ256rmk, 0},
+ {X86::VCVTTPS2UQQSZrrk, X86::VCVTTPS2UQQSZrmk, 0},
{X86::VCVTTPS2UQQZ128rrk, X86::VCVTTPS2UQQZ128rmk, TB_NO_REVERSE},
{X86::VCVTTPS2UQQZ256rrk, X86::VCVTTPS2UQQZ256rmk, 0},
{X86::VCVTTPS2UQQZrrk, X86::VCVTTPS2UQQZrmk, 0},
@@ -7046,15 +7134,27 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTTNEBF162IUBSZ128rr, X86::VCVTTNEBF162IUBSZ128rmb, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZ256rr, X86::VCVTTNEBF162IUBSZ256rmb, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZrr, X86::VCVTTNEBF162IUBSZrmb, TB_BCAST_SH},
+ {X86::VCVTTPD2DQSZ128rr, X86::VCVTTPD2DQSZ128rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZ256rr, X86::VCVTTPD2DQSZ256rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZrr, X86::VCVTTPD2DQSZrmb, TB_BCAST_SD},
{X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rmb, TB_BCAST_SD},
{X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rmb, TB_BCAST_SD},
{X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrmb, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ128rr, X86::VCVTTPD2QQSZ128rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ256rr, X86::VCVTTPD2QQSZ256rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZrr, X86::VCVTTPD2QQSZrmb, TB_BCAST_SD},
{X86::VCVTTPD2QQZ128rr, X86::VCVTTPD2QQZ128rmb, TB_BCAST_SD},
{X86::VCVTTPD2QQZ256rr, X86::VCVTTPD2QQZ256rmb, TB_BCAST_SD},
{X86::VCVTTPD2QQZrr, X86::VCVTTPD2QQZrmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ128rr, X86::VCVTTPD2UDQSZ128rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ256rr, X86::VCVTTPD2UDQSZ256rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZrr, X86::VCVTTPD2UDQSZrmb, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ128rr, X86::VCVTTPD2UDQZ128rmb, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ256rr, X86::VCVTTPD2UDQZ256rmb, TB_BCAST_SD},
{X86::VCVTTPD2UDQZrr, X86::VCVTTPD2UDQZrmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ128rr, X86::VCVTTPD2UQQSZ128rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ256rr, X86::VCVTTPD2UQQSZ256rmb, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZrr, X86::VCVTTPD2UQQSZrmb, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ128rr, X86::VCVTTPD2UQQZ128rmb, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ256rr, X86::VCVTTPD2UQQZ256rmb, TB_BCAST_SD},
{X86::VCVTTPD2UQQZrr, X86::VCVTTPD2UQQZrmb, TB_BCAST_SD},
@@ -7082,6 +7182,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTTPH2WZ128rr, X86::VCVTTPH2WZ128rmb, TB_BCAST_SH},
{X86::VCVTTPH2WZ256rr, X86::VCVTTPH2WZ256rmb, TB_BCAST_SH},
{X86::VCVTTPH2WZrr, X86::VCVTTPH2WZrmb, TB_BCAST_SH},
+ {X86::VCVTTPS2DQSZ128rr, X86::VCVTTPS2DQSZ128rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZ256rr, X86::VCVTTPS2DQSZ256rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZrr, X86::VCVTTPS2DQSZrmb, TB_BCAST_SS},
{X86::VCVTTPS2DQZ128rr, X86::VCVTTPS2DQZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2DQZ256rr, X86::VCVTTPS2DQZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2DQZrr, X86::VCVTTPS2DQZrmb, TB_BCAST_SS},
@@ -7091,12 +7194,21 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTTPS2IUBSZ128rr, X86::VCVTTPS2IUBSZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZ256rr, X86::VCVTTPS2IUBSZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZrr, X86::VCVTTPS2IUBSZrmb, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ128rr, X86::VCVTTPS2QQSZ128rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ256rr, X86::VCVTTPS2QQSZ256rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZrr, X86::VCVTTPS2QQSZrmb, TB_BCAST_SS},
{X86::VCVTTPS2QQZ128rr, X86::VCVTTPS2QQZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2QQZ256rr, X86::VCVTTPS2QQZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2QQZrr, X86::VCVTTPS2QQZrmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ128rr, X86::VCVTTPS2UDQSZ128rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ256rr, X86::VCVTTPS2UDQSZ256rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZrr, X86::VCVTTPS2UDQSZrmb, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ128rr, X86::VCVTTPS2UDQZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ256rr, X86::VCVTTPS2UDQZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2UDQZrr, X86::VCVTTPS2UDQZrmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ128rr, X86::VCVTTPS2UQQSZ128rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ256rr, X86::VCVTTPS2UQQSZ256rmb, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZrr, X86::VCVTTPS2UQQSZrmb, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrmb, TB_BCAST_SS},
@@ -7422,15 +7534,27 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTTNEBF162IUBSZ128rrkz, X86::VCVTTNEBF162IUBSZ128rmbkz, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZ256rrkz, X86::VCVTTNEBF162IUBSZ256rmbkz, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZrrkz, X86::VCVTTNEBF162IUBSZrmbkz, TB_BCAST_SH},
+ {X86::VCVTTPD2DQSZ128rrkz, X86::VCVTTPD2DQSZ128rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZ256rrkz, X86::VCVTTPD2DQSZ256rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZrrkz, X86::VCVTTPD2DQSZrmbkz, TB_BCAST_SD},
{X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ128rrkz, X86::VCVTTPD2QQSZ128rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ256rrkz, X86::VCVTTPD2QQSZ256rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZrrkz, X86::VCVTTPD2QQSZrmbkz, TB_BCAST_SD},
{X86::VCVTTPD2QQZ128rrkz, X86::VCVTTPD2QQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2QQZ256rrkz, X86::VCVTTPD2QQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2QQZrrkz, X86::VCVTTPD2QQZrmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ128rrkz, X86::VCVTTPD2UDQSZ128rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ256rrkz, X86::VCVTTPD2UDQSZ256rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZrrkz, X86::VCVTTPD2UDQSZrmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ128rrkz, X86::VCVTTPD2UDQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ256rrkz, X86::VCVTTPD2UDQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UDQZrrkz, X86::VCVTTPD2UDQZrmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ128rrkz, X86::VCVTTPD2UQQSZ128rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ256rrkz, X86::VCVTTPD2UQQSZ256rmbkz, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZrrkz, X86::VCVTTPD2UQQSZrmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ128rrkz, X86::VCVTTPD2UQQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ256rrkz, X86::VCVTTPD2UQQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2UQQZrrkz, X86::VCVTTPD2UQQZrmbkz, TB_BCAST_SD},
@@ -7458,6 +7582,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTTPH2WZ128rrkz, X86::VCVTTPH2WZ128rmbkz, TB_BCAST_SH},
{X86::VCVTTPH2WZ256rrkz, X86::VCVTTPH2WZ256rmbkz, TB_BCAST_SH},
{X86::VCVTTPH2WZrrkz, X86::VCVTTPH2WZrmbkz, TB_BCAST_SH},
+ {X86::VCVTTPS2DQSZ128rrkz, X86::VCVTTPS2DQSZ128rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZ256rrkz, X86::VCVTTPS2DQSZ256rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZrrkz, X86::VCVTTPS2DQSZrmbkz, TB_BCAST_SS},
{X86::VCVTTPS2DQZ128rrkz, X86::VCVTTPS2DQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2DQZ256rrkz, X86::VCVTTPS2DQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2DQZrrkz, X86::VCVTTPS2DQZrmbkz, TB_BCAST_SS},
@@ -7467,12 +7594,21 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTTPS2IUBSZ128rrkz, X86::VCVTTPS2IUBSZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZ256rrkz, X86::VCVTTPS2IUBSZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZrrkz, X86::VCVTTPS2IUBSZrmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ128rrkz, X86::VCVTTPS2QQSZ128rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ256rrkz, X86::VCVTTPS2QQSZ256rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZrrkz, X86::VCVTTPS2QQSZrmbkz, TB_BCAST_SS},
{X86::VCVTTPS2QQZ128rrkz, X86::VCVTTPS2QQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2QQZ256rrkz, X86::VCVTTPS2QQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2QQZrrkz, X86::VCVTTPS2QQZrmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ128rrkz, X86::VCVTTPS2UDQSZ128rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ256rrkz, X86::VCVTTPS2UDQSZ256rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZrrkz, X86::VCVTTPS2UDQSZrmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ128rrkz, X86::VCVTTPS2UDQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ256rrkz, X86::VCVTTPS2UDQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UDQZrrkz, X86::VCVTTPS2UDQZrmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ128rrkz, X86::VCVTTPS2UQQSZ128rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ256rrkz, X86::VCVTTPS2UQQSZ256rmbkz, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZrrkz, X86::VCVTTPS2UQQSZrmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ128rrkz, X86::VCVTTPS2UQQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ256rrkz, X86::VCVTTPS2UQQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmbkz, TB_BCAST_SS},
@@ -8135,15 +8271,27 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTTNEBF162IUBSZ128rrk, X86::VCVTTNEBF162IUBSZ128rmbk, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZ256rrk, X86::VCVTTNEBF162IUBSZ256rmbk, TB_BCAST_SH},
{X86::VCVTTNEBF162IUBSZrrk, X86::VCVTTNEBF162IUBSZrmbk, TB_BCAST_SH},
+ {X86::VCVTTPD2DQSZ128rrk, X86::VCVTTPD2DQSZ128rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZ256rrk, X86::VCVTTPD2DQSZ256rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2DQSZrrk, X86::VCVTTPD2DQSZrmbk, TB_BCAST_SD},
{X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmbk, TB_BCAST_SD},
{X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmbk, TB_BCAST_SD},
{X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ128rrk, X86::VCVTTPD2QQSZ128rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZ256rrk, X86::VCVTTPD2QQSZ256rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2QQSZrrk, X86::VCVTTPD2QQSZrmbk, TB_BCAST_SD},
{X86::VCVTTPD2QQZ128rrk, X86::VCVTTPD2QQZ128rmbk, TB_BCAST_SD},
{X86::VCVTTPD2QQZ256rrk, X86::VCVTTPD2QQZ256rmbk, TB_BCAST_SD},
{X86::VCVTTPD2QQZrrk, X86::VCVTTPD2QQZrmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ128rrk, X86::VCVTTPD2UDQSZ128rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZ256rrk, X86::VCVTTPD2UDQSZ256rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UDQSZrrk, X86::VCVTTPD2UDQSZrmbk, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ128rrk, X86::VCVTTPD2UDQZ128rmbk, TB_BCAST_SD},
{X86::VCVTTPD2UDQZ256rrk, X86::VCVTTPD2UDQZ256rmbk, TB_BCAST_SD},
{X86::VCVTTPD2UDQZrrk, X86::VCVTTPD2UDQZrmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ128rrk, X86::VCVTTPD2UQQSZ128rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZ256rrk, X86::VCVTTPD2UQQSZ256rmbk, TB_BCAST_SD},
+ {X86::VCVTTPD2UQQSZrrk, X86::VCVTTPD2UQQSZrmbk, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ128rrk, X86::VCVTTPD2UQQZ128rmbk, TB_BCAST_SD},
{X86::VCVTTPD2UQQZ256rrk, X86::VCVTTPD2UQQZ256rmbk, TB_BCAST_SD},
{X86::VCVTTPD2UQQZrrk, X86::VCVTTPD2UQQZrmbk, TB_BCAST_SD},
@@ -8171,6 +8319,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTTPH2WZ128rrk, X86::VCVTTPH2WZ128rmbk, TB_BCAST_SH},
{X86::VCVTTPH2WZ256rrk, X86::VCVTTPH2WZ256rmbk, TB_BCAST_SH},
{X86::VCVTTPH2WZrrk, X86::VCVTTPH2WZrmbk, TB_BCAST_SH},
+ {X86::VCVTTPS2DQSZ128rrk, X86::VCVTTPS2DQSZ128rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZ256rrk, X86::VCVTTPS2DQSZ256rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2DQSZrrk, X86::VCVTTPS2DQSZrmbk, TB_BCAST_SS},
{X86::VCVTTPS2DQZ128rrk, X86::VCVTTPS2DQZ128rmbk, TB_BCAST_SS},
{X86::VCVTTPS2DQZ256rrk, X86::VCVTTPS2DQZ256rmbk, TB_BCAST_SS},
{X86::VCVTTPS2DQZrrk, X86::VCVTTPS2DQZrmbk, TB_BCAST_SS},
@@ -8180,12 +8331,21 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTTPS2IUBSZ128rrk, X86::VCVTTPS2IUBSZ128rmbk, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZ256rrk, X86::VCVTTPS2IUBSZ256rmbk, TB_BCAST_SS},
{X86::VCVTTPS2IUBSZrrk, X86::VCVTTPS2IUBSZrmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ128rrk, X86::VCVTTPS2QQSZ128rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZ256rrk, X86::VCVTTPS2QQSZ256rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2QQSZrrk, X86::VCVTTPS2QQSZrmbk, TB_BCAST_SS},
{X86::VCVTTPS2QQZ128rrk, X86::VCVTTPS2QQZ128rmbk, TB_BCAST_SS},
{X86::VCVTTPS2QQZ256rrk, X86::VCVTTPS2QQZ256rmbk, TB_BCAST_SS},
{X86::VCVTTPS2QQZrrk, X86::VCVTTPS2QQZrmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ128rrk, X86::VCVTTPS2UDQSZ128rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZ256rrk, X86::VCVTTPS2UDQSZ256rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UDQSZrrk, X86::VCVTTPS2UDQSZrmbk, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ128rrk, X86::VCVTTPS2UDQZ128rmbk, TB_BCAST_SS},
{X86::VCVTTPS2UDQZ256rrk, X86::VCVTTPS2UDQZ256rmbk, TB_BCAST_SS},
{X86::VCVTTPS2UDQZrrk, X86::VCVTTPS2UDQZrmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ128rrk, X86::VCVTTPS2UQQSZ128rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZ256rrk, X86::VCVTTPS2UQQSZ256rmbk, TB_BCAST_SS},
+ {X86::VCVTTPS2UQQSZrrk, X86::VCVTTPS2UQQSZrmbk, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ128rrk, X86::VCVTTPS2UQQZ128rmbk, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ256rrk, X86::VCVTTPS2UQQZ256rmbk, TB_BCAST_SS},
{X86::VCVTTPS2UQQZrrk, X86::VCVTTPS2UQQZrmbk, TB_BCAST_SS},
>From 4c1676902596d5d4b1ffecf2d3a5f8cd44b5ac03 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Tue, 27 Aug 2024 05:09:32 -0700
Subject: [PATCH 2/8] review1
---
clang/lib/Headers/CMakeLists.txt | 4 +-
clang/lib/Headers/avx10_2_512satcvtdsintrin.h | 5 -
clang/lib/Headers/avx10_2satcvtdsintrin.h | 1181 +++++++++++------
clang/lib/Headers/immintrin.h | 10 +-
.../avx10_2_512satcvtds-builtins-x64-error.c | 12 +-
.../X86/avx10_2_512satcvtds-builtins-x64.c | 36 +-
.../X86/avx10_2_512satcvtds-builtins.c | 12 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 17 +-
.../X86/avx10_2_512satcvtds-intrinsics.ll | 22 -
.../CodeGen/X86/avx10_2satcvtds-intrinsics.ll | 32 -
10 files changed, 810 insertions(+), 521 deletions(-)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 9981290628697c..4d8d236ec9caf1 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -149,12 +149,12 @@ set(x86_files
amxintrin.h
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
- avx10_2_512satcvtintrin.h
avx10_2_512satcvtdsintrin.h
+ avx10_2_512satcvtintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
- avx10_2satcvtintrin.h
avx10_2satcvtdsintrin.h
+ avx10_2satcvtintrin.h
avx2intrin.h
avx512bf16intrin.h
avx512bitalgintrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index e8b815653c3d6e..79a8699bc7e164 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -87,7 +87,6 @@ _mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
(__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \
(const int)(R)))
-#ifdef __x86_64__
// 512 bit : Double -> Long
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d A) {
@@ -156,8 +155,6 @@ _mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
(__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
(const int)(R)))
-#endif
-
// 512 bit: Float -> int
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
@@ -228,7 +225,6 @@ _mm512_maskz_cvttsps_epu32(__mmask16 U, __m512 A) {
(__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \
(const int)(R)))
-#ifdef __x86_64__
// 512 bit : float -> long
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
@@ -296,7 +292,6 @@ _mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
(__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
(const int)(R)))
-#endif
#undef __DEFAULT_FN_ATTRS
#endif // __AVX10_2_512SATCVTDSINTRIN_H
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 5588c9ccfa4319..bca34120a8fec8 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -20,434 +20,787 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
__min_vector_width__(256)))
-#define _mm_cvtt_roundssd_i32(A, R) \
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+#define _mm_cvtts_roundsd_i32(A, R) \
((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128)(A), (const int)(R)))
-#define _mm_cvtt_roundssd_si32(A, R) \
+#define _mm_cvtts_roundsd_si32(A, R) \
((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128d)(A), (const int)(R)))
-
-#ifdef __x86_64__
-#define _mm_cvtt_roundssd_si64(A, R) \
- ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
- (const int)(R)))
-
-#define _mm_cvtt_roundssd_i64(A, R) \
- ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
(const int)(R)))
-#endif
-#define _mm_cvtt_roundssd_u32(A, R) \
- ((unsigned int)__builtin_ia32_vcvttssd2usi32((__v2df)(__m128d)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundsd_u32(A, R) \
+ ((unsigned int) \
+ __builtin_ia32_vcvttssd2usi32( \
+ (__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_i32(A, R) \
+ ((int) \
+ __builtin_ia32_vcvttsss2si32( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_si32(A, \
+ R) \
+ ((int) \
+ __builtin_ia32_vcvttsss2si32( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_u32(A, R) \
+ ((unsigned int) \
+ __builtin_ia32_vcvttsss2usi32( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
#ifdef __x86_64__
-#define _mm_cvtt_roundssd_u64(A, R) \
- ((unsigned long long)__builtin_ia32_vcvttssd2usi64((__v2df)(__m128d)(A), \
- (const int)(R)))
-#endif
-
-#define _mm_cvtt_roundsss_i32(A, R) \
- ((int)__builtin_ia32_vcvttsss2si32((__v4sf)(__m128)(A), (const int)(R)))
-
-#define _mm_cvtt_roundsss_si32(A, R) \
- ((int)__builtin_ia32_vcvttsss2si32((__v4sf)(__m128)(A), (const int)(R)))
-
-#ifdef __x86_64__
-#define _mm_cvtt_roundsss_i64(A, R) \
- ((long long)__builtin_ia32_vcvttsss2si64((__v4sf)(__m128)(A), (const int)(R)))
-
-#define _mm_cvtt_roundsss_si64(A, R) \
- ((long long)__builtin_ia32_vcvttsss2si64((__v4sf)(__m128)(A), (const int)(R)))
-#endif
-
-#define _mm_cvtt_roundsss_u32(A, R) \
- ((unsigned int)__builtin_ia32_vcvttsss2usi32((__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#ifdef __x86_64__
-#define _mm_cvtt_roundsss_u64(A, R) \
- ((unsigned long long)__builtin_ia32_vcvttsss2usi64((__v4sf)(__m128)(A), \
- (const int)(R)))
-#endif
-
-// 128 Bit : Double -> int
-#define _mm_cvttspd_epi32(A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
+#define _mm_cvtts_roundss_u64(A, R) \
+ ((unsigned long long) \
+ __builtin_ia32_vcvttsss2usi64( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundsd_u64(A, R) \
+ ((unsigned long long) \
+ __builtin_ia32_vcvttssd2usi64( \
+ (__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_i64(A, R) \
+ ((long long) \
+ __builtin_ia32_vcvttsss2si64( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_si64(A, \
+ R) \
+ ((long long) \
+ __builtin_ia32_vcvttsss2si64( \
+ (__v4sf)(__m128)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundsd_si64(A, \
+ R) \
+ ((long long) \
+ __builtin_ia32_vcvttssd2si64( \
+ (__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundsd_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
+#endif /* __x86_64__ */
+
+ // 128 Bit : Double -> int
+#define _mm_cvttspd_epi32(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epi32( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi32(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
-#define _mm_mask_cvttspd_epi32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)W, (__mmask8)U))
+// 256 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm256_cvttspd_epi32(__m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)(__m256d)A,
+ (__v4si)
+ _mm_undefined_si128(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m128i
+ __DEFAULT_FN_ATTRS128
+ _mm256_mask_cvttspd_epi32(
+ __m128i W, __mmask8 U,
+ __m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A, (__v4si)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m128i
+ __DEFAULT_FN_ATTRS128
+ _mm256_maskz_cvttspd_epi32(
+ __mmask8 U, __m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A,
+ (__v4si)
+ _mm_setzero_si128(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundpd_epi32( \
+ A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1, \
+ (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32( \
+ W, U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32( \
+ U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U, (int)(R)))
+
+ // 128 Bit : Double -> uint
+#define _mm_cvttspd_epu32(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epu32( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs128_mask( \
+ ((__v2df)(__m128d)A), \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu32(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+
+ // 256 Bit : Double -> uint
+ static __inline__ __m128i
+ __DEFAULT_FN_ATTRS128
+ _mm256_cvttspd_epu32(__m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A,
+ (__v4si)
+ _mm_undefined_si128(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m128i
+ __DEFAULT_FN_ATTRS128
+ _mm256_mask_cvttspd_epu32(
+ __m128i W, __mmask8 U,
+ __m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m128i
+ __DEFAULT_FN_ATTRS128
+ _mm256_maskz_cvttspd_epu32(
+ __mmask8 U, __m256d A) {
+ return (
+ (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A,
+ (__v4si)
+ _mm_setzero_si128(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundpd_epu32( \
+ A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1, \
+ (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32( \
+ W, U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32( \
+ U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U, (int)(R)))
+
+ // 128 Bit : Double -> long
+#define _mm_cvttspd_epi64(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epi64( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi64(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+
+ // 256 Bit : Double -> long
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttspd_epi64(__m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A,
+ (__v4di)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttspd_epi64(
+ __m256i W, __mmask8 U,
+ __m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttspd_epi64(
+ __mmask8 U, __m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A,
+ (__v4di)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundpd_epi64( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, \
+ (__v4di) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epi64( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, (__v4di)W, \
+ (__mmask8)U, (int)R))
-#define _mm_maskz_cvttspd_epi32(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+#define _mm256_maskz_cvtts_roundpd_epi64( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, \
+ (__v4di) \
+ _mm256_setzero_si256(), \
+ (__mmask8)U, (int)R))
-// 256 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi32(__m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)(__m256d)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundpd_epi32(A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
- (int)(R)))
-
-// 128 Bit : Double -> uint
-#define _mm_cvttspd_epu32(A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttspd_epu32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- ((__v2df)(__m128d)A), (__v4si)(__m128i)W, (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epu32(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-
-// 256 Bit : Double -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu32(__m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundpd_epu32(A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
- (int)(R)))
-
-// 128 Bit : Double -> long
-#ifdef __x86_64__
+ // 128 Bit : Double -> ulong
+#define _mm_cvttspd_epu64(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epu64( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu64(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, \
+ (__v2di) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+
+ // 256 Bit : Double -> ulong
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttspd_epu64(__m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A,
+ (__v4di)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttspd_epu64(
+ __m256i W, __mmask8 U,
+ __m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttspd_epu64(
+ __mmask8 U, __m256d A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A,
+ (__v4di)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundpd_epu64( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, \
+ (__v4di) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epu64( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, (__v4di)W, \
+ (__mmask8)U, (int)R))
-#define _mm_cvttspd_epi64(A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
-
-#define _mm_mask_cvttspd_epi64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epi64(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-
-// 256 Bit : Double -> long
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttspd_epi64(__m256i W, __mmask8 U, __m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundpd_epi64(A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)A, (__v4di)W, \
- (__mmask8)U, (int)R))
-
-#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-
-// 128 Bit : Double -> ulong
-#define _mm_cvttspd_epu64(A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
-
-#define _mm_mask_cvttspd_epu64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epu64(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-
-// 256 Bit : Double -> ulong
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu64(__m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttspd_epu64(__m256i W, __mmask8 U, __m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundpd_epu64(A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask((__v4df)A, (__v4di)W, \
+#define _mm256_maskz_cvtts_roundpd_epu64( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, \
+ (__v4di) \
+ _mm256_setzero_si256(), \
(__mmask8)U, (int)R))
-#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-#endif
-
-// 128 Bit : float -> int
-#define _mm_cvttsps_epi32(A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttsps_epi32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epi32(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-
-// 256 Bit : float -> int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A, (__v8si)_mm256_setzero_si256(), U,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundps_epi32(A, R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
- (int)(R)))
-
-// 128 Bit : float -> uint
-#define _mm_cvttsps_epu32(A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttsps_epu32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epu32(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-
-// 256 Bit : float -> uint
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu32(__m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttsps_epu32(__m256i W, __mmask8 U, __m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundps_epu32(A, R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
- (int)(R)))
-
-// 128 bit : float -> long
-#ifdef __x86_64__
+ // 128 Bit : float -> int
+#define _mm_cvttsps_epi32(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epi32( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi32(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+
+ // 256 Bit : float -> int
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttsps_epi32(__m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)A,
+ (__v8si)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttsps_epi32(
+ __m256i W, __mmask8 U,
+ __m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A,
+ (__v8si)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttsps_epi32(
+ __mmask8 U, __m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A,
+ (__v8si)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundps_epi32( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epi32( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i)W, \
+ (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256(), \
+ (__mmask8)U, (int)(R)))
+
+ // 128 Bit : float -> uint
+#define _mm_cvttsps_epu32(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i) \
+ _mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epu32( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu32(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4si)(__m128i) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+
+ // 256 Bit : float -> uint
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttsps_epu32(__m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A,
+ (__v8si)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttsps_epu32(
+ __m256i W, __mmask8 U,
+ __m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttsps_epu32(
+ __mmask8 U, __m256 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A,
+ (__v8si)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundps_epu32( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epu32( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i)W, \
+ (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, \
+ (__v8si)(__m256i) \
+ _mm256_setzero_si256(), \
+ (__mmask8)U, (int)(R)))
+
+ // 128 bit : float -> long
+#define _mm_cvttsps_epi64(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epi64( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi64(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+ /*
+ // 256 bit : float -> long
+ */
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttsps_epi64(__m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A,
+ (__v4di)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttsps_epi64(
+ __m256i W, __mmask8 U,
+ __m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttsps_epi64(
+ __mmask8 U, __m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A,
+ (__v4di)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundps_epi64( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundps_epi64( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di)W, (__mmask8)U, \
+ (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epi64( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di) \
+ _mm256_setzero_si256(), \
+ (__mmask8)U, (int)R))
-#define _mm_cvttsps_epi64(A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
-
-#define _mm_mask_cvttsps_epi64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epi64(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-/*
-// 256 bit : float -> long
-*/
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttsps_epi64(__m256i W, __mmask8 U, __m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundps_epi64(A, R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)R))
-
-#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
-
-#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-
-// 128 bit : float -> ulong
-#define _mm_cvttsps_epu64(A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
-
-#define _mm_mask_cvttsps_epu64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epu64(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-/*
-// 256 bit : float -> ulong
-*/
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_cvttsps_epu64(__m256i W, __mmask8 U, __m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
-}
-
-#define _mm256_cvtts_roundps_epu64(A, R) \
- ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)R))
-
-#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
-
-#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
- ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-#endif
+ // 128 bit : float -> ulong
+#define _mm_cvttsps_epu64(A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di) \
+ _mm_undefined_si128(), \
+ (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epu64( \
+ W, U, A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di)(__m128i)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu64(U, \
+ A) \
+ ((__m128i) \
+ __builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, \
+ (__v2di) \
+ _mm_setzero_si128(), \
+ (__mmask8)U))
+ /*
+ // 256 bit : float -> ulong
+ */
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_cvttsps_epu64(__m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A,
+ (__v4di)
+ _mm256_undefined_si256(),
+ (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_mask_cvttsps_epu64(
+ __m256i W, __mmask8 U,
+ __m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+ static __inline__ __m256i
+ __DEFAULT_FN_ATTRS
+ _mm256_maskz_cvttsps_epu64(
+ __mmask8 U, __m128 A) {
+ return (
+ (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A,
+ (__v4di)
+ _mm256_setzero_si256(),
+ U,
+ _MM_FROUND_CUR_DIRECTION));
+ }
+
+#define _mm256_cvtts_roundps_epu64( \
+ A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di) \
+ _mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundps_epu64( \
+ W, U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di)W, (__mmask8)U, \
+ (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epu64( \
+ U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, \
+ (__v4di) \
+ _mm256_setzero_si256(), \
+ (__mmask8)U, (int)R))
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS
-#endif /*__AVX10_2SATCVTDSINTRIN_H*/
\ No newline at end of file
+#endif /*__AVX10_2SATCVTDSINTRIN_H*/
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index ec9ad9201a0d24..42714ad418ba71 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -200,14 +200,6 @@
#include <avx512vlbf16intrin.h>
#endif
-#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
-#include <avx10_2satcvtdsintrin.h>
-#endif
-
-#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
-#include <avx10_2_512satcvtdsintrin.h>
-#endif
-
#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
#endif
@@ -659,12 +651,14 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
+#include <avx10_2satcvtdsintrin.h>
#include <avx10_2satcvtintrin.h>
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
#include <avx10_2_512minmaxintrin.h>
#include <avx10_2_512niintrin.h>
+#include <avx10_2_512satcvtdsintrin.h>
#include <avx10_2_512satcvtintrin.h>
#endif
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
index 7c7c94fbf8c89b..2900256914570c 100755
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c
@@ -4,27 +4,27 @@
#include <stddef.h>
long long test_mm_cvttssd_si64(__m128d __A) {
- return _mm_cvtt_roundssd_si64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundsd_si64(__A, 22); // expected-error {{invalid rounding argument}}
}
long long test_mm_cvttssd_i64(__m128d __A) {
- return _mm_cvtt_roundssd_i64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundsd_i64(__A, 22); // expected-error {{invalid rounding argument}}
}
unsigned long long test_mm_cvttssd_u64(__m128d __A) {
- return _mm_cvtt_roundssd_u64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundsd_u64(__A, 22); // expected-error {{invalid rounding argument}}
}
float test_mm_cvttsss_i64(__m128 __A) {
- return _mm_cvtt_roundsss_i64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundss_i64(__A, 22); // expected-error {{invalid rounding argument}}
}
long long test_mm_cvttsss_si64(__m128 __A) {
- return _mm_cvtt_roundsss_si64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundss_si64(__A, 22); // expected-error {{invalid rounding argument}}
}
unsigned long long test_mm_cvttsss_u64(__m128 __A) {
- return _mm_cvtt_roundsss_u64(__A, 22); // expected-error {{invalid rounding argument}}
+ return _mm_cvtts_roundss_u64(__A, 22); // expected-error {{invalid rounding argument}}
}
__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
index 9e8b7f01c4c816..3c6f7f77100088 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
@@ -6,37 +6,37 @@
long long test_mm_cvttssd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si64(
// CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
- return _mm_cvtt_roundssd_si64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_si64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttssd_i64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i64(
// CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
- return _mm_cvtt_roundssd_i64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_i64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttssd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u64(
// CHECK: @llvm.x86.avx512.vcvttssd2usi64(<2 x double>
- return _mm_cvtt_roundssd_u64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_u64(__A, _MM_FROUND_NO_EXC);
}
float test_mm_cvttsss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i64(
// CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
- return _mm_cvtt_roundsss_i64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_i64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttsss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si64(
// CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
- return _mm_cvtt_roundsss_si64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_si64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttsss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u64(
// CHECK: @llvm.x86.avx512.vcvttsss2usi64(<4 x float>
- return _mm_cvtt_roundsss_u64(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_u64(__A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttspd_epi64(__m512d A) {
@@ -60,19 +60,19 @@ __m512i test_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
- return _mm512_cvtts_roundpd_epi64(A, _MM_FROUND_NEARBYINT);
+ return _mm512_cvtts_roundpd_epi64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
- return _mm512_mask_cvtts_roundpd_epi64(W, U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_mask_cvtts_roundpd_epi64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
- return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttspd_epu64(__m512d A) {
@@ -96,19 +96,19 @@ __m512i test_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
- return _mm512_cvtts_roundpd_epu64(A, _MM_FROUND_NEARBYINT);
+ return _mm512_cvtts_roundpd_epu64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
- return _mm512_mask_cvtts_roundpd_epu64(W, U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_mask_cvtts_roundpd_epu64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
- return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epi64(__m256 A) {
@@ -132,19 +132,19 @@ __m512i test_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
- return _mm512_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT);
+ return _mm512_cvtts_roundps_epi64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
- return _mm512_mask_cvtts_roundps_epi64(W, U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_mask_cvtts_roundps_epi64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi64
// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
- return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epu64(__m256 A) {
@@ -168,17 +168,17 @@ __m512i test_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
- return _mm512_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT);
+ return _mm512_cvtts_roundps_epu64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
- return _mm512_mask_cvtts_roundps_epu64(W, U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_mask_cvtts_roundps_epu64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu64
// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
- return _mm512_maskz_cvtts_roundps_epu64(U, A, _MM_FROUND_NEARBYINT);
+ return _mm512_maskz_cvtts_roundps_epu64(U, A, _MM_FROUND_NO_EXC);
}
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
index c518d0c5d77884..5ea91d67b461d0 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -6,37 +6,37 @@
int test_mm_cvttssd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i32
// CHECK: @llvm.x86.avx512.vcvttssd2si
- return _mm_cvtt_roundssd_i32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttssd_si32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si32(
// CHECK: @llvm.x86.avx512.vcvttssd2si(<2 x double>
- return _mm_cvtt_roundssd_si32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttssd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u32(
// CHECK: @llvm.x86.avx512.vcvttssd2usi(<2 x double>
- return _mm_cvtt_roundssd_u32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i32(
// CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
- return _mm_cvtt_roundsss_i32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si32(
// CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
- return _mm_cvtt_roundsss_si32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttsss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u32(
// CHECK: @llvm.x86.avx512.vcvttsss2usi(<4 x float>
- return _mm_cvtt_roundsss_u32(__A, _MM_FROUND_NO_EXC);
+ return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_cvttspd_epi32(__m512d A) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f4fd8607ba0dc5..cb83bf73344985 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -324,14 +324,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- if (Subtarget.hasAVX10_2() || Subtarget.hasAVX10_2_512()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
- }
- } else if (Subtarget.hasSSE2()) {
+ if (Subtarget.hasSSE2()) {
// Custom lowering for saturating float to int conversions.
// We handle promotion to larger result types manually.
for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
@@ -343,6 +336,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
}
}
+ if (Subtarget.hasAVX10_2()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
+ }
+ }
// Handle address space casts between mixed sized pointers.
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
index 5d3bb704984fb3..5d556dedcf8722 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
@@ -53,7 +53,6 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_undef(<8 x double> %x0, i8
ret <8 x i32> %res
}
-
define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_default(<8 x double>* %x0) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_512_default:
; X64: # %bb.0:
@@ -69,7 +68,6 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_default(<8 x double>* %x0)
%res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-
declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) {
@@ -137,10 +135,8 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512_default(<8 x double>* %x0)
%res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-
declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
-
define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512:
; X64: # %bb.0:
@@ -191,7 +187,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_undef(<8 x double> %x0, i8
ret <8 x i64> %res
}
-
define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_default(<8 x double>* %x0) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512_default:
; X64: # %bb.0:
@@ -207,7 +202,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_default(<8 x double>* %x0)
%res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-
declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
@@ -260,7 +254,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_undef(<8 x double> %x0, i8
ret <8 x i64> %res
}
-
define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_default(<8 x double>* %x0) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512_default:
; X64: # %bb.0:
@@ -276,13 +269,8 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_default(<8 x double>* %x0)
%res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-
declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
-
-
-
-
define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512:
; X64: # %bb.0:
@@ -333,7 +321,6 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_undef(<16 x float> %x0, i1
ret <16 x i32> %res
}
-
define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_default(<16 x float>* %x0) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512_default:
; X64: # %bb.0:
@@ -349,7 +336,6 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_default(<16 x float>* %x0)
%res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
ret <16 x i32> %res
}
-
declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
@@ -402,7 +388,6 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_undef(<16 x float> %x0, i
ret <16 x i32> %res
}
-
define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_default(<16 x float>* %x0) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512_default:
; X64: # %bb.0:
@@ -418,8 +403,6 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_default(<16 x float>* %x0
%res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
ret <16 x i32> %res
}
-
-
declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
@@ -472,7 +455,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_undef(<8 x float> %x0, i8 %
ret <8 x i64> %res
}
-
define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_default(<8 x float> %x0) {
; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512_default:
; CHECK: # %bb.0:
@@ -481,7 +463,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_default(<8 x float> %x0) {
%res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-
declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
@@ -534,7 +515,6 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_undef(<8 x float> %x0, i8
ret <8 x i64> %res
}
-
define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_default(<8 x float> %x0) {
; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512_default:
; CHECK: # %bb.0:
@@ -543,6 +523,4 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_default(<8 x float> %x0) {
%res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-
declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
-
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
index 28457c4e4d6329..7dbc97d7e1aaa5 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -145,7 +145,6 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256_default(<4 x double>* %xptr
%res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 -1, i32 4)
ret <4 x i32> %res
}
-
declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
@@ -220,10 +219,8 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256_default(<4 x double>* %x0)
%res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x10, <4 x i32> undef, i8 -1, i32 4)
ret <4 x i32> %res
}
-
declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
-
define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256:
; X64: # %bb.0:
@@ -290,12 +287,8 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256_default(<4 x double>* %x0)
%res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
ret <4 x i64> %res
}
-
-
declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
-
-
define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256:
; X64: # %bb.0:
@@ -362,12 +355,8 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256_default(<4 x double>* %x0)
%res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
ret <4 x i64> %res
}
-
-
declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
-
-
define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256:
; X64: # %bb.0:
@@ -433,11 +422,8 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256_default(<8 x float>* %x0) {
%res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-
-
declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
-
define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256:
; X64: # %bb.0:
@@ -504,10 +490,8 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256_default(<8 x float>* %x0)
%res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-
declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
-
define <4 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_256_z(<4 x float> %x0, i8 %mask) {
; X64-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_256_z:
; X64: # %bb.0:
@@ -539,15 +523,8 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2qqs_256_undef(<4 x float> %x0, i8 %
%res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
ret <4 x i64> %res
}
-
-
-
-
-
declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
-
-
define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256(<4 x float> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256:
; X64: # %bb.0:
@@ -610,8 +587,6 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256_default(<4 x float> %x0) {
declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
-
-
define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128:
; X64: # %bb.0:
@@ -671,7 +646,6 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128_default(<2 x double> %x0) {
%res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-
declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
@@ -733,7 +707,6 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_default(<2 x double> %x0)
%res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-
declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>, <4 x i32>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
@@ -795,7 +768,6 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128_default(<2 x double> %x0) {
%res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
-
declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
@@ -857,7 +829,6 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128_default(<2 x double> %x0)
%res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
-
declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_default(<4 x float> %x0) {
@@ -926,7 +897,6 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_default(<4 x float> %x0) {
}
declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>, <4 x i32>, i8)
-
define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128(<4 x float> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128:
; X64: # %bb.0:
@@ -984,7 +954,6 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_default(<4 x float> %x0) {
}
declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>, <4 x i32>, i8)
-
define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_undef(<4 x float> %x0, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_undef:
; X64: # %bb.0:
@@ -1034,7 +1003,6 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128(<4 x float> %x0, <2 x i64>
%res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
ret <2 x i64> %res
}
-
declare <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128(<4 x float> %x0, <2 x i64> %src, i8 %mask) {
>From f99a6ed507e849fb47d4a061f068d1966bd0579d Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Wed, 4 Sep 2024 21:06:40 +0800
Subject: [PATCH 3/8] Match buultin name with asm
Fix error in header
---
clang/include/clang/Basic/BuiltinsX86.def | 8 +-
clang/include/clang/Basic/BuiltinsX86_64.def | 8 +-
clang/lib/Headers/avx10_2satcvtdsintrin.h | 1167 ++++++-----------
clang/lib/Sema/SemaX86.cpp | 16 +-
.../X86/avx10_2_512satcvtds-builtins-x64.c | 12 +-
.../X86/avx10_2_512satcvtds-builtins.c | 12 +-
llvm/include/llvm/IR/IntrinsicsX86.td | 16 +-
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 16 +-
.../CodeGen/X86/avx10_2satcvtds-intrinsics.ll | 48 +-
.../X86/avx10_2satcvtds-x64-intrinsics.ll | 32 +-
10 files changed, 488 insertions(+), 847 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e8c6b4b7973a1f..3f47e34cc9098c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2123,10 +2123,10 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnnii
TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
// AVX10.2 SATCVT-DS
-TARGET_BUILTIN(__builtin_ia32_vcvttssd2si32, "iV2dIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi32, "UiV2dIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttsss2si32, "iV4fIi", "ncV:128:", "avx10.2-256")
-TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi32, "UiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis32, "iV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis32, "UiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2sis32, "iV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usis32, "UiV4fIi", "ncV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index ed9b17b8bd7b8e..4bd71ba274b968 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -100,10 +100,10 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
// AVX10.2 SATCVT-DS
-TARGET_BUILTIN(__builtin_ia32_vcvttssd2si64, "OiV2dIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi64, "UOiV2dIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttsss2si64, "OiV4fIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi64, "UOiV4fIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis64, "OiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis64, "UOiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2sis64, "OiV4fIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usis64, "UOiV4fIi", "ncV:128:", "avx10.2-512")
// UINTR
TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index bca34120a8fec8..75c6114a5bb2e1 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -25,782 +25,423 @@
__min_vector_width__(128)))
#define _mm_cvtts_roundsd_i32(A, R) \
- ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128)(A), (const int)(R)))
+ ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(A), (const int)(R)))
#define _mm_cvtts_roundsd_si32(A, R) \
- ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128d)(A), (const int)(R)))
- (const int)(R)))
+ ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(A), (const int)(R)))
+
+#define _mm_cvtts_roundsd_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(A), \
+ (const int)(R)))
+
+#define _mm_cvtts_roundss_i32(A, R) \
+ ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
-#define _mm_cvtts_roundsd_u32(A, R) \
- ((unsigned int) \
- __builtin_ia32_vcvttssd2usi32( \
- (__v2df)(__m128d)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundss_i32(A, R) \
- ((int) \
- __builtin_ia32_vcvttsss2si32( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundss_si32(A, \
- R) \
- ((int) \
- __builtin_ia32_vcvttsss2si32( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundss_u32(A, R) \
- ((unsigned int) \
- __builtin_ia32_vcvttsss2usi32( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundss_si32(A, R) \
+ ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundss_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(A), \
+ (const int)(R)))
#ifdef __x86_64__
-#define _mm_cvtts_roundss_u64(A, R) \
- ((unsigned long long) \
- __builtin_ia32_vcvttsss2usi64( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundsd_u64(A, R) \
- ((unsigned long long) \
- __builtin_ia32_vcvttssd2usi64( \
- (__v2df)(__m128d)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundss_i64(A, R) \
- ((long long) \
- __builtin_ia32_vcvttsss2si64( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundss_si64(A, \
- R) \
- ((long long) \
- __builtin_ia32_vcvttsss2si64( \
- (__v4sf)(__m128)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundsd_si64(A, \
- R) \
- ((long long) \
- __builtin_ia32_vcvttssd2si64( \
- (__v2df)(__m128d)(A), \
- (const int)(R)))
-
-#define _mm_cvtts_roundsd_i64(A, R) \
- ((long long)__builtin_ia32_vcvttssd2si64((__v2df)(__m128d)(A), \
-#endif /* __x86_64__ */
+#define _mm_cvtts_roundss_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(A), \
+ (const int)(R)))
- // 128 Bit : Double -> int
-#define _mm_cvttspd_epi32(A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttspd_epi32( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v4si)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epi32(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U))
+#define _mm_cvtts_roundsd_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(A), \
+ (const int)(R)))
-// 256 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm256_cvttspd_epi32(__m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)(__m256d)A,
- (__v4si)
- _mm_undefined_si128(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m128i
- __DEFAULT_FN_ATTRS128
- _mm256_mask_cvttspd_epi32(
- __m128i W, __mmask8 U,
- __m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m128i
- __DEFAULT_FN_ATTRS128
- _mm256_maskz_cvttspd_epi32(
- __mmask8 U, __m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A,
- (__v4si)
- _mm_setzero_si128(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundpd_epi32( \
- A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8) - 1, \
- (int)(R)))
-
-#define _mm256_mask_cvtts_roundpd_epi32( \
- W, U, A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i)W, \
- (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi32( \
- U, A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U, (int)(R)))
-
- // 128 Bit : Double -> uint
-#define _mm_cvttspd_epu32(A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttspd_epu32( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs128_mask( \
- ((__v2df)(__m128d)A), \
- (__v4si)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epu32(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U))
-
- // 256 Bit : Double -> uint
- static __inline__ __m128i
- __DEFAULT_FN_ATTRS128
- _mm256_cvttspd_epu32(__m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A,
- (__v4si)
- _mm_undefined_si128(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m128i
- __DEFAULT_FN_ATTRS128
- _mm256_mask_cvttspd_epu32(
- __m128i W, __mmask8 U,
- __m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m128i
- __DEFAULT_FN_ATTRS128
- _mm256_maskz_cvttspd_epu32(
- __mmask8 U, __m256d A) {
- return (
- (__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A,
- (__v4si)
- _mm_setzero_si128(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundpd_epu32( \
- A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8) - 1, \
- (int)(R)))
-
-#define _mm256_mask_cvtts_roundpd_epu32( \
- W, U, A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i)W, \
- (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundpd_epu32( \
- U, A, R) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U, (int)(R)))
-
- // 128 Bit : Double -> long
-#define _mm_cvttspd_epi64(A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di) \
- _mm_undefined_si128(), \
- (__mmask8) - 1))
-
-#define _mm_mask_cvttspd_epi64( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di)W, (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epi64(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di) \
- _mm_setzero_si128(), \
- (__mmask8)U))
-
- // 256 Bit : Double -> long
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttspd_epi64(__m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A,
- (__v4di)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttspd_epi64(
- __m256i W, __mmask8 U,
- __m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttspd_epi64(
- __mmask8 U, __m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A,
- (__v4di)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundpd_epi64( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, \
- (__v4di) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundpd_epi64( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, (__v4di)W, \
- (__mmask8)U, (int)R))
+#define _mm_cvtts_roundss_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
-#define _mm256_maskz_cvtts_roundpd_epi64( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, \
- (__v4di) \
- _mm256_setzero_si256(), \
- (__mmask8)U, (int)R))
+#define _mm_cvtts_roundss_si64(A, R) \
+ ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
- // 128 Bit : Double -> ulong
-#define _mm_cvttspd_epu64(A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di) \
- _mm_undefined_si128(), \
- (__mmask8) - 1))
-
-#define _mm_mask_cvttspd_epu64( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di)W, (__mmask8)U))
-
-#define _mm_maskz_cvttspd_epu64(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, \
- (__v2di) \
- _mm_setzero_si128(), \
- (__mmask8)U))
-
- // 256 Bit : Double -> ulong
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttspd_epu64(__m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A,
- (__v4di)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttspd_epu64(
- __m256i W, __mmask8 U,
- __m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttspd_epu64(
- __mmask8 U, __m256d A) {
- return (
- (__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A,
- (__v4di)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundpd_epu64( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, \
- (__v4di) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundpd_epu64( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, (__v4di)W, \
- (__mmask8)U, (int)R))
+#define _mm_cvtts_roundsd_si64(A, R) \
+ ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A), \
+ (const int)(R)))
-#define _mm256_maskz_cvtts_roundpd_epu64( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, \
- (__v4di) \
- _mm256_setzero_si256(), \
- (__mmask8)U, (int)R))
+#define _mm_cvtts_roundsd_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A), \
+ (const int)(R)))
+#endif /* __x86_64__ */
- // 128 Bit : float -> int
-#define _mm_cvttsps_epi32(A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttsps_epi32( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epi32(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U))
-
- // 256 Bit : float -> int
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttsps_epi32(__m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)A,
- (__v8si)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttsps_epi32(
- __m256i W, __mmask8 U,
- __m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A,
- (__v8si)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttsps_epi32(
- __mmask8 U, __m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A,
- (__v8si)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundps_epi32( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundps_epi32( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i)W, \
- (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundps_epi32( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i) \
- _mm256_setzero_si256(), \
- (__mmask8)U, (int)(R)))
-
- // 128 Bit : float -> uint
-#define _mm_cvttsps_epu32(A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i) \
- _mm_undefined_si128(), \
- (__mmask8)(-1)))
-
-#define _mm_mask_cvttsps_epu32( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epu32(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v4si)(__m128i) \
- _mm_setzero_si128(), \
- (__mmask8)U))
-
- // 256 Bit : float -> uint
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttsps_epu32(__m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A,
- (__v8si)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttsps_epu32(
- __m256i W, __mmask8 U,
- __m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttsps_epu32(
- __mmask8 U, __m256 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A,
- (__v8si)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundps_epu32( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
-
-#define _mm256_mask_cvtts_roundps_epu32( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i)W, \
- (__mmask8)U, (int)(R)))
-
-#define _mm256_maskz_cvtts_roundps_epu32( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, \
- (__v8si)(__m256i) \
- _mm256_setzero_si256(), \
- (__mmask8)U, (int)(R)))
-
- // 128 bit : float -> long
-#define _mm_cvttsps_epi64(A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di) \
- _mm_undefined_si128(), \
- (__mmask8) - 1))
-
-#define _mm_mask_cvttsps_epi64( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epi64(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di) \
- _mm_setzero_si128(), \
- (__mmask8)U))
- /*
- // 256 bit : float -> long
- */
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttsps_epi64(__m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A,
- (__v4di)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttsps_epi64(
- __m256i W, __mmask8 U,
- __m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttsps_epi64(
- __mmask8 U, __m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A,
- (__v4di)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundps_epi64( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundps_epi64( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di)W, (__mmask8)U, \
- (int)R))
-
-#define _mm256_maskz_cvtts_roundps_epi64( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di) \
- _mm256_setzero_si256(), \
- (__mmask8)U, (int)R))
+// 128 Bit : Double -> int
+#define _mm_cvttspd_epi32(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
- // 128 bit : float -> ulong
-#define _mm_cvttsps_epu64(A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di) \
- _mm_undefined_si128(), \
- (__mmask8) - 1))
-
-#define _mm_mask_cvttsps_epu64( \
- W, U, A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di)(__m128i)W, \
- (__mmask8)U))
-
-#define _mm_maskz_cvttsps_epu64(U, \
- A) \
- ((__m128i) \
- __builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, \
- (__v2di) \
- _mm_setzero_si128(), \
- (__mmask8)U))
- /*
- // 256 bit : float -> ulong
- */
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_cvttsps_epu64(__m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A,
- (__v4di)
- _mm256_undefined_si256(),
- (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_mask_cvttsps_epu64(
- __m256i W, __mmask8 U,
- __m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)W, U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
- static __inline__ __m256i
- __DEFAULT_FN_ATTRS
- _mm256_maskz_cvttsps_epu64(
- __mmask8 U, __m128 A) {
- return (
- (__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A,
- (__v4di)
- _mm256_setzero_si256(),
- U,
- _MM_FROUND_CUR_DIRECTION));
- }
-
-#define _mm256_cvtts_roundps_epu64( \
- A, R) \
- ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di) \
- _mm256_undefined_si256(), \
- (__mmask8) - 1, (int)R))
-
-#define _mm256_mask_cvtts_roundps_epu64( \
- W, U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di)W, (__mmask8)U, \
- (int)R))
-
-#define _mm256_maskz_cvtts_roundps_epu64( \
- U, A, R) \
- ((__m256i) \
- __builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, \
- (__v4di) \
- _mm256_setzero_si256(), \
+#define _mm_mask_cvttspd_epi32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_cvttspd_epi32(__m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)(__m256d)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : Double -> uint
+#define _mm_cvttspd_epu32(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epu32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ ((__v2df)(__m128d)A), (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
+ (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> uint
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_cvttspd_epu32(__m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
+ (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
+ (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : Double -> long
+#define _mm_cvttspd_epi64(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epi64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> long
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epi64(__m256i W, __mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)A, (__v4di)W, \
+ (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
+// 128 Bit : Double -> ulong
+#define _mm_cvttspd_epu64(A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttspd_epu64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
+ (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epu64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
+ (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> ulong
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu64(__m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttspd_epu64(__m256i W, __mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask((__v4df)A, (__v4di)W, \
(__mmask8)U, (int)R))
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
+// 128 Bit : float -> int
+#define _mm_cvttsps_epi32(A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epi32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : float -> int
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
+ (__v8sf)(__m256)A, (__v8si)_mm256_setzero_si256(), U,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 Bit : float -> uint
+#define _mm_cvttsps_epu32(A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8)(-1)))
+
+#define _mm_mask_cvttsps_epu32(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu32(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
+ (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : float -> uint
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu32(__m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epu32(__m256i W, __mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
+ (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
+ (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
+ (int)(R)))
+
+// 128 bit : float -> long
+#define _mm_cvttsps_epi64(A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epi64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epi64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+/*
+// 256 bit : float -> long
+*/
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epi64(__m256i W, __mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)R))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
+// 128 bit : float -> ulong
+#define _mm_cvttsps_epu64(A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+
+#define _mm_mask_cvttsps_epu64(W, U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttsps_epu64(U, A) \
+ ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
+ (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+/*
+// 256 bit : float -> ulong
+*/
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttsps_epu64(__m256i W, __mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
+ (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)R))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
+ (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS
-#endif /*__AVX10_2SATCVTDSINTRIN_H*/
+#endif // __AVX10_2SATCVTDSINTRIN_H
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 452a53e8883335..6a4d78f0ca9084 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -46,14 +46,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttsh2si64:
case X86::BI__builtin_ia32_vcvttsh2usi32:
case X86::BI__builtin_ia32_vcvttsh2usi64:
- case X86::BI__builtin_ia32_vcvttssd2si32:
- case X86::BI__builtin_ia32_vcvttssd2usi32:
- case X86::BI__builtin_ia32_vcvttsss2si32:
- case X86::BI__builtin_ia32_vcvttsss2usi32:
- case X86::BI__builtin_ia32_vcvttssd2si64:
- case X86::BI__builtin_ia32_vcvttssd2usi64:
- case X86::BI__builtin_ia32_vcvttsss2si64:
- case X86::BI__builtin_ia32_vcvttsss2usi64:
+ case X86::BI__builtin_ia32_vcvttsd2sis32:
+ case X86::BI__builtin_ia32_vcvttsd2usis32:
+ case X86::BI__builtin_ia32_vcvttss2sis32:
+ case X86::BI__builtin_ia32_vcvttss2usis32:
+ case X86::BI__builtin_ia32_vcvttsd2sis64:
+ case X86::BI__builtin_ia32_vcvttsd2usis64:
+ case X86::BI__builtin_ia32_vcvttss2sis64:
+ case X86::BI__builtin_ia32_vcvttss2usis64:
ArgNum = 1;
break;
case X86::BI__builtin_ia32_maxpd512:
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
index 3c6f7f77100088..32b1a2094c4da3 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
@@ -5,37 +5,37 @@
long long test_mm_cvttssd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si64(
- // CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis64(<2 x double>
return _mm_cvtts_roundsd_si64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttssd_i64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i64(
- // CHECK: @llvm.x86.avx512.vcvttssd2si64(<2 x double>
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis64(<2 x double>
return _mm_cvtts_roundsd_i64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttssd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u64(
- // CHECK: @llvm.x86.avx512.vcvttssd2usi64(<2 x double>
+ // CHECK: @llvm.x86.avx512.vcvttsd2usis64(<2 x double>
return _mm_cvtts_roundsd_u64(__A, _MM_FROUND_NO_EXC);
}
float test_mm_cvttsss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i64(
- // CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2sis64(<4 x float>
return _mm_cvtts_roundss_i64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttsss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si64(
- // CHECK: @llvm.x86.avx512.vcvttsss2si64(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2sis64(<4 x float>
return _mm_cvtts_roundss_si64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttsss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u64(
- // CHECK: @llvm.x86.avx512.vcvttsss2usi64(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2usis64(<4 x float>
return _mm_cvtts_roundss_u64(__A, _MM_FROUND_NO_EXC);
}
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
index 5ea91d67b461d0..889ebc221c79e4 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -5,37 +5,37 @@
int test_mm_cvttssd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i32
- // CHECK: @llvm.x86.avx512.vcvttssd2si
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis
return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttssd_si32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si32(
- // CHECK: @llvm.x86.avx512.vcvttssd2si(<2 x double>
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis(<2 x double>
return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttssd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u32(
- // CHECK: @llvm.x86.avx512.vcvttssd2usi(<2 x double>
+ // CHECK: @llvm.x86.avx512.vcvttsd2usis(<2 x double>
return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i32(
- // CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si32(
- // CHECK: @llvm.x86.avx512.vcvttsss2si(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttsss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u32(
- // CHECK: @llvm.x86.avx512.vcvttsss2usi(<4 x float>
+ // CHECK: @llvm.x86.avx512.vcvttss2usis(<4 x float>
return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 6efda49fe3d8c4..5df5348c993b76 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5522,28 +5522,28 @@ let TargetPrefix = "x86" in {
// conversion with saturation
let TargetPrefix = "x86" in {
- def int_x86_avx512_vcvttsss2si : ClangBuiltin<"__builtin_ia32_vcvttsss2si32">,
+ def int_x86_avx512_vcvttss2sis : ClangBuiltin<"__builtin_ia32_vcvttss2sis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsss2si64 : ClangBuiltin<"__builtin_ia32_vcvttsss2si64">,
+ def int_x86_avx512_vcvttss2sis64 : ClangBuiltin<"__builtin_ia32_vcvttss2sis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsss2usi : ClangBuiltin<"__builtin_ia32_vcvttsss2usi32">,
+ def int_x86_avx512_vcvttss2usis : ClangBuiltin<"__builtin_ia32_vcvttss2usis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsss2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsss2usi64">,
+ def int_x86_avx512_vcvttss2usis64 : ClangBuiltin<"__builtin_ia32_vcvttss2usis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttssd2si : ClangBuiltin<"__builtin_ia32_vcvttssd2si32">,
+ def int_x86_avx512_vcvttsd2sis : ClangBuiltin<"__builtin_ia32_vcvttsd2sis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttssd2si64 : ClangBuiltin<"__builtin_ia32_vcvttssd2si64">,
+ def int_x86_avx512_vcvttsd2sis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2sis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttssd2usi : ClangBuiltin<"__builtin_ia32_vcvttssd2usi32">,
+ def int_x86_avx512_vcvttsd2usis : ClangBuiltin<"__builtin_ia32_vcvttsd2usis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttssd2usi64 : ClangBuiltin<"__builtin_ia32_vcvttssd2usi64">,
+ def int_x86_avx512_vcvttsd2usis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2usis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_vcvttpd2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs128_mask">,
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 77c3947c3801e6..0e6c82e4e2e96c 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1574,21 +1574,21 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTS2UI_RND),
X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI,
X86ISD::CVTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_vcvttssd2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86_INTRINSIC_DATA(avx512_vcvttsd2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttssd2si64, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttsd2sis64, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttssd2usi, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttsd2usis, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttssd2usi64, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttsd2usis64, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsss2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86_INTRINSIC_DATA(avx512_vcvttss2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsss2si64, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttss2sis64, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsss2usi, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttss2usis, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsss2usi64, INTR_TYPE_1OP_SAE,
+ X86_INTRINSIC_DATA(avx512_vcvttss2usis64, INTR_TYPE_1OP_SAE,
X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA,
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
index 7dbc97d7e1aaa5..84374216c0e89f 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -2,77 +2,77 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
-define i32 @test_x86_avx512_vcvttssd2usi(<2 x double> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttssd2usi:
+define i32 @test_x86_avx512_vcvttsd2usis(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsd2usis:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsd2usis %xmm0, %ecx # encoding: [0x62,0xf5,0x7f,0x08,0x6c,0xc8]
; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6c,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double> %a0, i32 8) ;
+ %res0 = call i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttssd2usi(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double>, i32) nounwind readnone
-define i32 @test_x86_avx512_vcvttssd2si(<2 x double> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttssd2si:
+define i32 @test_x86_avx512_vcvttsd2sis(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttsd2sis:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttsd2sis %xmm0, %ecx # encoding: [0x62,0xf5,0x7f,0x08,0x6d,0xc8]
; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6d,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttssd2si(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttssd2si(<2 x double> %a0, i32 8) ;
+ %res0 = call i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttssd2si(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double>, i32) nounwind readnone
-define i32 @test_x86_avx512_vcvttsss2si(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttsss2si:
+define i32 @test_x86_avx512_vcvttss2sis(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttss2sis:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttss2sis {sae}, %xmm0, %ecx # encoding: [0x62,0xf5,0x7e,0x18,0x6d,0xc8]
; CHECK-NEXT: vcvttss2sis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a0, i32 4) ;
+ %res0 = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttsss2si(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvttss2sis(<4 x float>, i32) nounwind readnone
-define i32 @test_x86_avx512_vcvttsss2si_load(ptr %a0) {
-; X64-LABEL: test_x86_avx512_vcvttsss2si_load:
+define i32 @test_x86_avx512_vcvttss2sis_load(ptr %a0) {
+; X64-LABEL: test_x86_avx512_vcvttss2sis_load:
; X64: # %bb.0:
; X64-NEXT: vcvttss2sis (%rdi), %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x07]
; X64-NEXT: retq # encoding: [0xc3]
;
-; X86-LABEL: test_x86_avx512_vcvttsss2si_load:
+; X86-LABEL: test_x86_avx512_vcvttss2sis_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vcvttss2sis (%eax), %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%a1 = load <4 x float>, ptr %a0
- %res = call i32 @llvm.x86.avx512.vcvttsss2si(<4 x float> %a1, i32 4) ;
+ %res = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a1, i32 4) ;
ret i32 %res
}
-define i32 @test_x86_avx512_vcvttsss2usi(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttsss2usi:
+define i32 @test_x86_avx512_vcvttss2usis(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttss2usis:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttss2usis {sae}, %xmm0, %ecx # encoding: [0x62,0xf5,0x7e,0x18,0x6c,0xc8]
; CHECK-NEXT: vcvttss2usis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6c,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float> %a0, i32 4) ;
+ %res0 = call i32 @llvm.x86.avx512.vcvttss2usis(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.vcvttss2usis(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttsss2usi(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvttss2usis(<4 x float>, i32) nounwind readnone
define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256:
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
index 5cf613e89ba502..460411e4222dee 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
@@ -8,12 +8,12 @@ define i64 @test_x86_avx512_vcvttsd2si64(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6d,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttssd2si64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_vcvttsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttsd2usi64:
@@ -22,37 +22,37 @@ define i64 @test_x86_avx512_vcvttsd2usi64(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6c,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttssd2usi64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double>, i32) nounwind readnone
-define i64 @test_x86_avx512_vcvttsss2si64(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttsss2si64:
+define i64 @test_x86_avx512_vcvttss2sis64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttss2sis64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttss2sis %xmm0, %rcx # encoding: [0x62,0xf5,0xfe,0x08,0x6d,0xc8]
; CHECK-NEXT: vcvttss2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6d,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttsss2si64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float>, i32) nounwind readnone
-define i64 @test_x86_avx512_vcvttsss2usi64(<4 x float> %a0) {
-; CHECK-LABEL: test_x86_avx512_vcvttsss2usi64:
+define i64 @test_x86_avx512_vcvttss2usis64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_vcvttss2usis64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttss2usis %xmm0, %rcx # encoding: [0x62,0xf5,0xfe,0x08,0x6c,0xc8]
; CHECK-NEXT: vcvttss2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6c,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttsss2usi64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float>, i32) nounwind readnone
>From 90548f990114dc0b94277ea669b317a6102b2f56 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Thu, 5 Sep 2024 17:00:48 +0800
Subject: [PATCH 4/8] review4
---
clang/include/clang/Basic/BuiltinsX86_64.def | 8 ++--
clang/lib/Headers/avx10_2satcvtdsintrin.h | 28 ++++++-------
.../X86/avx10_2_512satcvtds-builtins.c | 42 +++----------------
.../X86/avx10_2satcvtds-builtins-x64.c | 39 +++++++++++++++++
.../CodeGen/X86/avx10_2satcvtds-builtins.c | 13 ++++--
5 files changed, 69 insertions(+), 61 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 4bd71ba274b968..db381aa77e7612 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -100,10 +100,10 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
// AVX10.2 SATCVT-DS
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis64, "OiV2dIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis64, "UOiV2dIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2sis64, "OiV4fIi", "ncV:128:", "avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usis64, "UOiV4fIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2sis64, "OiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usis64, "UOiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2sis64, "OiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usis64, "UOiV4fIi", "ncV:128:", "avx10.2-256")
// UINTR
TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 75c6114a5bb2e1..4e91b4a17f0c4b 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -68,7 +68,7 @@
(const int)(R)))
#endif /* __x86_64__ */
-// 128 Bit : Double -> int
+// 128 Bit : Double -> int
#define _mm_cvttspd_epi32(A) \
((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
@@ -82,7 +82,7 @@
((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : Double -> int
+// 256 Bit : Double -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm256_cvttspd_epi32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
@@ -116,7 +116,7 @@ _mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
(__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
(int)(R)))
-// 128 Bit : Double -> uint
+// 128 Bit : Double -> uint
#define _mm_cvttspd_epu32(A) \
((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
@@ -130,7 +130,7 @@ _mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : Double -> uint
+// 256 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm256_cvttspd_epu32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
@@ -164,7 +164,7 @@ _mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
(__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
(int)(R)))
-// 128 Bit : Double -> long
+// 128 Bit : Double -> long
#define _mm_cvttspd_epi64(A) \
((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
(__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
@@ -177,7 +177,7 @@ _mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
(__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : Double -> long
+// 256 Bit : Double -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
(__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
@@ -208,7 +208,7 @@ _mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-// 128 Bit : Double -> ulong
+// 128 Bit : Double -> ulong
#define _mm_cvttspd_epu64(A) \
((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
(__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
@@ -221,7 +221,7 @@ _mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
(__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : Double -> ulong
+// 256 Bit : Double -> ulong
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu64(__m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
@@ -253,7 +253,7 @@ _mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-// 128 Bit : float -> int
+// 128 Bit : float -> int
#define _mm_cvttsps_epi32(A) \
((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
(__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
@@ -267,7 +267,7 @@ _mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
(__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : float -> int
+// 256 Bit : float -> int
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
(__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
@@ -301,7 +301,7 @@ _mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
(__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
(int)(R)))
-// 128 Bit : float -> uint
+// 128 Bit : float -> uint
#define _mm_cvttsps_epu32(A) \
((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
(__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
@@ -315,7 +315,7 @@ _mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
(__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
-// 256 Bit : float -> uint
+// 256 Bit : float -> uint
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu32(__m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
@@ -361,9 +361,7 @@ _mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
#define _mm_maskz_cvttsps_epi64(U, A) \
((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
(__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-/*
// 256 bit : float -> long
-*/
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
@@ -407,9 +405,7 @@ _mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
#define _mm_maskz_cvttsps_epu64(U, A) \
((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
(__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
-/*
// 256 bit : float -> ulong
-*/
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
index 889ebc221c79e4..af92975d5953a5 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -1,44 +1,9 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-512 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
#include <immintrin.h>
#include <stddef.h>
-int test_mm_cvttssd_i32(__m128d __A) {
- // CHECK-LABEL: @test_mm_cvttssd_i32
- // CHECK: @llvm.x86.avx512.vcvttsd2sis
- return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
-}
-
-int test_mm_cvttssd_si32(__m128d __A) {
- // CHECK-LABEL: @test_mm_cvttssd_si32(
- // CHECK: @llvm.x86.avx512.vcvttsd2sis(<2 x double>
- return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
-}
-
-unsigned test_mm_cvttssd_u32(__m128d __A) {
- // CHECK-LABEL: @test_mm_cvttssd_u32(
- // CHECK: @llvm.x86.avx512.vcvttsd2usis(<2 x double>
- return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
-}
-
-int test_mm_cvttsss_i32(__m128 __A) {
- // CHECK-LABEL: @test_mm_cvttsss_i32(
- // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
- return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
-}
-
-int test_mm_cvttsss_si32(__m128 __A) {
- // CHECK-LABEL: @test_mm_cvttsss_si32(
- // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
- return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
-}
-
-unsigned test_mm_cvttsss_u32(__m128 __A) {
- // CHECK-LABEL: @test_mm_cvttsss_u32(
- // CHECK: @llvm.x86.avx512.vcvttss2usis(<4 x float>
- return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
-}
-
__m256i test_mm512_cvttspd_epi32(__m512d A) {
// CHECK-LABEL: test_mm512_cvttspd_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
@@ -181,3 +146,6 @@ __m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_maskz_cvtts_roundps_epu32(U, A, _MM_FROUND_NO_EXC);
}
+
+// X64: {{.*}}
+// X86: {{.*}}
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
index c72283c449ea9e..5d0af555465ff5 100644
--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
@@ -3,6 +3,45 @@
#include <immintrin.h>
#include <stddef.h>
+// scalar
+
+int test_mm_cvttssd_i32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_i32
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis
+ return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttssd_si32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_si32(
+ // CHECK: @llvm.x86.avx512.vcvttsd2sis(<2 x double>
+ return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttssd_u32(__m128d __A) {
+ // CHECK-LABEL: @test_mm_cvttssd_u32(
+ // CHECK: @llvm.x86.avx512.vcvttsd2usis(<2 x double>
+ return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_i32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_i32(
+ // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
+ return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
+}
+
+int test_mm_cvttsss_si32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_si32(
+ // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
+ return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
+}
+
+unsigned test_mm_cvttsss_u32(__m128 __A) {
+ // CHECK-LABEL: @test_mm_cvttsss_u32(
+ // CHECK: @llvm.x86.avx512.vcvttss2usis(<4 x float>
+ return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
+}
+
+// vector
// 128 bit
__m128i test_mm_cvttspd_epi64(__m128d A){
// CHECK-LABEL: @test_mm_cvttspd_epi64
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
index 5eee57ddc6a837..b537f91ed3a3d7 100644
--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
#include <immintrin.h>
#include <stddef.h>
@@ -16,7 +17,7 @@ __m128i test_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A){
}
__m128i test_mm_maskz_cvttspd_epi32( __mmask8 U, __m128d A){
-// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32
+// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32(
// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
return _mm_maskz_cvttspd_epi32(U,A);
}
@@ -38,11 +39,13 @@ __m128i test_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A){
// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_maskz_cvttspd_epi32(U,A);
}
+
__m128i test_mm256_cvtts_roundpd_epi32(__m256d A){
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epi32(A, _MM_FROUND_NEARBYINT);
}
+
__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
@@ -97,6 +100,7 @@ __m128i test_mm256_cvtts_roundpd_epu32(__m256d A){
// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epu32(A, _MM_FROUND_NEARBYINT);
}
+
__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu32
// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
@@ -127,7 +131,6 @@ __m128i test_mm_maskz_cvttsps_epi32( __mmask8 U, __m128 A){
return _mm_maskz_cvttsps_epi32(U,A);
}
-
__m256i test_mm256_cvttsps_epi32(__m256 A){
// CHECK-LABEL: @test_mm256_cvttsps_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
@@ -146,12 +149,12 @@ __m256i test_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A){
return _mm256_maskz_cvttsps_epi32(U,A);
}
-
__m256i test_mm256_cvtts_roundps_epi32(__m256 A){
// CHECK-LABEL: @test_mm256_cvtts_roundps_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_cvtts_roundps_epi32(A, _MM_FROUND_NEARBYINT);
}
+
__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W,__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi32
// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
@@ -218,3 +221,5 @@ __m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A){
return _mm256_maskz_cvtts_roundps_epu32(U,A,_MM_FROUND_NEARBYINT);
}
+// X64: {{.*}}
+// X86: {{.*}}
>From f7110dd6f4492ef3c08867f2a5f994ed4b7ed430 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Fri, 6 Sep 2024 14:02:04 +0800
Subject: [PATCH 5/8] review5
---
clang/lib/Headers/avx10_2satcvtdsintrin.h | 19 +-
.../X86/avx10_2_512satcvtds-builtins-x64.c | 60 ++---
.../X86/avx10_2_512satcvtds-builtins.c | 48 ++--
.../X86/avx10_2satcvtds-builtins-x64.c | 98 ++++----
.../CodeGen/X86/avx10_2satcvtds-builtins.c | 72 +++---
llvm/include/llvm/IR/IntrinsicsX86.td | 64 ++---
llvm/lib/Target/X86/X86InstrAVX10.td | 231 +++++++++---------
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 131 +++++-----
.../X86/avx10_2_512satcvtds-intrinsics.ll | 80 +++---
.../CodeGen/X86/avx10_2satcvtds-intrinsics.ll | 182 +++++++-------
.../X86/avx10_2satcvtds-x64-intrinsics.ll | 24 +-
11 files changed, 502 insertions(+), 507 deletions(-)
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 4e91b4a17f0c4b..af79ff4e044fec 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -20,10 +20,6 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
__min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128 \
- __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
- __min_vector_width__(128)))
-
#define _mm_cvtts_roundsd_i32(A, R) \
((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(A), (const int)(R)))
@@ -83,20 +79,19 @@
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
// 256 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm256_cvttspd_epi32(__m256d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)(__m256d)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
@@ -131,20 +126,19 @@ _mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
(__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
// 256 Bit : Double -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm256_cvttspd_epu32(__m256d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
@@ -438,6 +432,5 @@ _mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
(__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS
#endif // __AVX10_2SATCVTDSINTRIN_H
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
index 32b1a2094c4da3..8c8959a03d7bd4 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c
@@ -5,180 +5,180 @@
long long test_mm_cvttssd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si64(
- // CHECK: @llvm.x86.avx512.vcvttsd2sis64(<2 x double>
+ // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double>
return _mm_cvtts_roundsd_si64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttssd_i64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i64(
- // CHECK: @llvm.x86.avx512.vcvttsd2sis64(<2 x double>
+ // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double>
return _mm_cvtts_roundsd_i64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttssd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u64(
- // CHECK: @llvm.x86.avx512.vcvttsd2usis64(<2 x double>
+ // CHECK: @llvm.x86.avx10.vcvttsd2usis64(<2 x double>
return _mm_cvtts_roundsd_u64(__A, _MM_FROUND_NO_EXC);
}
float test_mm_cvttsss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i64(
- // CHECK: @llvm.x86.avx512.vcvttss2sis64(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float>
return _mm_cvtts_roundss_i64(__A, _MM_FROUND_NO_EXC);
}
long long test_mm_cvttsss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si64(
- // CHECK: @llvm.x86.avx512.vcvttss2sis64(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float>
return _mm_cvtts_roundss_si64(__A, _MM_FROUND_NO_EXC);
}
unsigned long long test_mm_cvttsss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u64(
- // CHECK: @llvm.x86.avx512.vcvttss2usis64(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2usis64(<4 x float>
return _mm_cvtts_roundss_u64(__A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttspd_epi64(__m512d A) {
// CHECK-LABEL: test_mm512_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_cvttspd_epi64(A);
}
__m512i test_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_mask_cvttspd_epi64(W, U, A);
}
__m512i test_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_maskz_cvttspd_epi64(U, A);
}
__m512i test_mm512_cvtts_roundpd_epi64(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_cvtts_roundpd_epi64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundpd_epi64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_mask_cvtts_roundpd_epi64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>
return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttspd_epu64(__m512d A) {
// CHECK-LABEL: test_mm512_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_cvttspd_epu64(A);
}
__m512i test_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_mask_cvttspd_epu64(W, U, A);
}
__m512i test_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_maskz_cvttspd_epu64(U, A);
}
__m512i test_mm512_cvtts_roundpd_epu64(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_cvtts_roundpd_epu64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundpd_epu64(__m512i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_mask_cvtts_roundpd_epu64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>
return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epi64(__m256 A) {
// CHECK-LABEL: test_mm512_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_cvttsps_epi64(A);
}
__m512i test_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_mask_cvttsps_epi64(W, U, A);
}
__m512i test_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_maskz_cvttsps_epi64(U, A);
}
__m512i test_mm512_cvtts_roundps_epi64(__m256 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_cvtts_roundps_epi64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epi64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_mask_cvtts_roundps_epi64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>
return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epu64(__m256 A) {
// CHECK-LABEL: test_mm512_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_cvttsps_epu64(A);
}
__m512i test_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_mask_cvttsps_epu64(W, U, A);
}
__m512i test_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_maskz_cvttsps_epu64(U, A);
}
__m512i test_mm512_cvtts_roundps_epu64(__m256 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_cvtts_roundps_epu64(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epu64(__m512i W, __mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_mask_cvtts_roundps_epu64(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epu64(__mmask8 U, __m256 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>
return _mm512_maskz_cvtts_roundps_epu64(U, A, _MM_FROUND_NO_EXC);
}
diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
index af92975d5953a5..cccee04627d22e 100644
--- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c
@@ -6,144 +6,144 @@
__m256i test_mm512_cvttspd_epi32(__m512d A) {
// CHECK-LABEL: test_mm512_cvttspd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_cvttspd_epi32(A);
}
__m256i test_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvttspd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_mask_cvttspd_epi32(W, U, A);
}
__m256i test_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvttspd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_maskz_cvttspd_epi32(U, A);
}
__m256i test_mm512_cvtts_roundpd_epi32(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_cvtts_roundpd_epi32(A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_mask_cvtts_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_mask_cvtts_roundpd_epi32(W, U, A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>
return _mm512_maskz_cvtts_roundpd_epi32(U, A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_cvttspd_epu32(__m512d A) {
// CHECK-LABEL: test_mm512_cvttspd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_cvttspd_epu32(A);
}
__m256i test_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvttspd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_mask_cvttspd_epu32(W, U, A);
}
__m256i test_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvttspd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_maskz_cvttspd_epu32(U, A);
}
__m256i test_mm512_cvtts_roundpd_epu32(__m512d A) {
// CHECK-LABEL: test_mm512_cvtts_roundpd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_cvtts_roundpd_epu32(A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_mask_cvtts_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundpd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_mask_cvtts_roundpd_epu32(W, U, A, _MM_FROUND_NO_EXC);
}
__m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundpd_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>
return _mm512_maskz_cvtts_roundpd_epu32(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epi32(__m512 A) {
// CHECK-LABEL: test_mm512_cvttsps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_cvttsps_epi32(A);
}
__m512i test_mm512_mask_cvttsps_epi32(__m512i W, __mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_mask_cvttsps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_mask_cvttsps_epi32(W, U, A);
}
__m512i test_mm512_maskz_cvttsps_epi32(__mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_maskz_cvttsps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_maskz_cvttsps_epi32(U, A);
}
__m512i test_mm512_cvtts_roundps_epi32(__m512 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_cvtts_roundps_epi32(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epi32(__m512i W, __mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_mask_cvtts_roundps_epi32(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epi32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>
return _mm512_maskz_cvtts_roundps_epi32(U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_cvttsps_epu32(__m512 A) {
// CHECK-LABEL: test_mm512_cvttsps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_cvttsps_epu32(A);
}
__m512i test_mm512_mask_cvttsps_epu32(__m512i W, __mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_mask_cvttsps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_mask_cvttsps_epu32(W, U, A);
}
__m512i test_mm512_maskz_cvttsps_epu32(__mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_maskz_cvttsps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_maskz_cvttsps_epu32(U, A);
}
__m512i test_mm512_cvtts_roundps_epu32(__m512 A) {
// CHECK-LABEL: test_mm512_cvtts_roundps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_cvtts_roundps_epu32(A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_mask_cvtts_roundps_epu32(__m512i W, __mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_mask_cvtts_roundps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_mask_cvtts_roundps_epu32(W, U, A, _MM_FROUND_NO_EXC);
}
__m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) {
// CHECK-LABEL: test_mm512_maskz_cvtts_roundps_epu32
- // CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>
return _mm512_maskz_cvtts_roundps_epu32(U, A, _MM_FROUND_NO_EXC);
}
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
index 5d0af555465ff5..00384731a51f71 100644
--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c
@@ -7,37 +7,37 @@
int test_mm_cvttssd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_i32
- // CHECK: @llvm.x86.avx512.vcvttsd2sis
+ // CHECK: @llvm.x86.avx10.vcvttsd2sis
return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttssd_si32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_si32(
- // CHECK: @llvm.x86.avx512.vcvttsd2sis(<2 x double>
+ // CHECK: @llvm.x86.avx10.vcvttsd2sis(<2 x double>
return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttssd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvttssd_u32(
- // CHECK: @llvm.x86.avx512.vcvttsd2usis(<2 x double>
+ // CHECK: @llvm.x86.avx10.vcvttsd2usis(<2 x double>
return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_i32(
- // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float>
return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC);
}
int test_mm_cvttsss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_si32(
- // CHECK: @llvm.x86.avx512.vcvttss2sis(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float>
return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC);
}
unsigned test_mm_cvttsss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvttsss_u32(
- // CHECK: @llvm.x86.avx512.vcvttss2usis(<4 x float>
+ // CHECK: @llvm.x86.avx10.vcvttss2usis(<4 x float>
return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC);
}
@@ -45,218 +45,218 @@ unsigned test_mm_cvttsss_u32(__m128 __A) {
// 128 bit
__m128i test_mm_cvttspd_epi64(__m128d A){
// CHECK-LABEL: @test_mm_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
return _mm_cvttspd_epi64(A);
}
__m128i test_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_mask_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
return _mm_mask_cvttspd_epi64(W, U, A);
}
__m128i test_mm_maskz_cvttspd_epi64(__mmask8 U,__m128d A){
// CHECK-LABEL: @test_mm_maskz_cvttspd_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>
return _mm_maskz_cvttspd_epi64(U, A);
}
__m128i test_mm_cvttspd_epu64(__m128d A){
// CHECK-LABEL: @test_mm_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
- return _mm_cvttspd_epu64( A);
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
+ return _mm_cvttspd_epu64(A);
}
__m128i test_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_mask_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
return _mm_mask_cvttspd_epu64(W, U, A);
}
__m128i test_mm_maskz_cvttspd_epu64(__mmask8 U,__m128d A){
// CHECK-LABEL: @test_mm_maskz_cvttspd_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>
+ // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>
return _mm_maskz_cvttspd_epu64(U, A);
}
// 256 bit
__m256i test_mm256_cvttspd_epi64(__m256d A){
// CHECK-LABEL: @test_mm256_cvttspd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
- return _mm256_cvttspd_epi64( A);
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
+ return _mm256_cvttspd_epi64(A);
}
__m256i test_mm256_mask_cvttspd_epi64(__m256i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvttspd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
return _mm256_mask_cvttspd_epi64(W,U, A);
}
__m256i test_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
return _mm256_maskz_cvttspd_epi64(U, A);
}
__m256i test_mm256_cvtts_roundpd_epi64(__m256d A){
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epi64(A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_mask_cvtts_roundpd_epi64(__m256i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
return _mm256_mask_cvtts_roundpd_epi64(W,U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_maskz_cvtts_roundpd_epi64(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
return _mm256_maskz_cvtts_roundpd_epi64(U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_cvttspd_epu64(__m256d A){
// CHECK-LABEL: @test_mm256_cvttspd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
- return _mm256_cvttspd_epu64( A);
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
+ return _mm256_cvttspd_epu64(A);
}
__m256i test_mm256_mask_cvttspd_epu64(__m256i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvttspd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
return _mm256_mask_cvttspd_epu64(W,U, A);
}
__m256i test_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
return _mm256_maskz_cvttspd_epu64(U, A);
}
__m256i test_mm256_cvtts_roundpd_epu64(__m256d A){
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epu64(A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_mask_cvtts_roundpd_epu64(__m256i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
return _mm256_mask_cvtts_roundpd_epu64(W,U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_maskz_cvtts_roundpd_epu64(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
return _mm256_maskz_cvtts_roundpd_epu64(U,A,_MM_FROUND_NEARBYINT );
}
// 128 bit
__m128i test_mm_cvttsps_epi64(__m128 A){
// CHECK-LABEL: @test_mm_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
- return _mm_cvttsps_epi64( A);
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
+ return _mm_cvttsps_epi64(A);
}
__m128i test_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_mask_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
return _mm_mask_cvttsps_epi64(W, U, A);
}
__m128i test_mm_maskz_cvttsps_epi64(__mmask8 U,__m128 A){
// CHECK-LABEL: @test_mm_maskz_cvttsps_epi64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>
return _mm_maskz_cvttsps_epi64(U, A);
}
__m128i test_mm_cvttsps_epu64(__m128 A){
// CHECK-LABEL: @test_mm_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
- return _mm_cvttsps_epu64( A);
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
+ return _mm_cvttsps_epu64(A);
}
__m128i test_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_mask_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
return _mm_mask_cvttsps_epu64(W, U, A);
}
__m128i test_mm_maskz_cvttsps_epu64(__mmask8 U,__m128 A){
// CHECK-LABEL: @test_mm_maskz_cvttsps_epu64
- // CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>
+ // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>
return _mm_maskz_cvttsps_epu64(U, A);
}
__m256i test_mm256_cvttsps_epi64(__m128 A){
// CHECK-LABEL: @test_mm256_cvttsps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
- return _mm256_cvttsps_epi64( A);
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
+ return _mm256_cvttsps_epi64(A);
}
__m256i test_mm256_mask_cvttsps_epi64(__m256i W,__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_mask_cvttsps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
return _mm256_mask_cvttsps_epi64(W,U, A);
}
__m256i test_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
return _mm256_maskz_cvttsps_epi64(U, A);
}
__m256i test_mm256_cvtts_roundps_epi64(__m128 A){
// CHECK-LABEL: @test_mm256_cvtts_roundps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
return _mm256_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT );
}
__m256i test_mm256_mask_cvtts_roundps_epi64(__m256i W,__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
return _mm256_mask_cvtts_roundps_epi64(W,U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_maskz_cvtts_roundps_epi64(__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
return _mm256_maskz_cvtts_roundps_epi64(U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_cvttsps_epu64(__m128 A){
// CHECK-LABEL: @test_mm256_cvttsps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
- return _mm256_cvttsps_epu64( A);
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
+ return _mm256_cvttsps_epu64(A);
}
__m256i test_mm256_mask_cvttsps_epu64(__m256i W,__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_mask_cvttsps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
return _mm256_mask_cvttsps_epu64(W,U, A);
}
__m256i test_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
return _mm256_maskz_cvttsps_epu64(U, A);
}
__m256i test_mm256_cvtts_roundps_epu64(__m128 A){
// CHECK-LABEL: @test_mm256_cvtts_roundps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
return _mm256_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT );
}
__m256i test_mm256_mask_cvtts_roundps_epu64(__m256i W,__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
return _mm256_mask_cvtts_roundps_epu64(W,U,A,_MM_FROUND_NEARBYINT );
}
__m256i test_mm256_maskz_cvtts_roundps_epu64(__mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu64
-// CHECK: @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
return _mm256_maskz_cvtts_roundps_epu64(U,A,_MM_FROUND_NEARBYINT );
}
diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
index b537f91ed3a3d7..bb90f6a086fa2b 100644
--- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c
@@ -6,218 +6,218 @@
__m128i test_mm_cvttspd_epi32(__m128d A){
// CHECK-LABEL: @test_mm_cvttspd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
return _mm_cvttspd_epi32(A);
}
__m128i test_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_mask_cvttspd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
return _mm_mask_cvttspd_epi32(W,U,A);
}
__m128i test_mm_maskz_cvttspd_epi32( __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32(
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>
return _mm_maskz_cvttspd_epi32(U,A);
}
__m128i test_mm256_cvttspd_epi32(__m256d A){
// CHECK-LABEL: @test_mm256_cvttspd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_cvttspd_epi32(A);
}
__m128i test_mm256_mask_cvttspd_epi32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvttspd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_mask_cvttspd_epi32(W,U,A);
}
__m128i test_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_maskz_cvttspd_epi32(U,A);
}
__m128i test_mm256_cvtts_roundpd_epi32(__m256d A){
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epi32(A, _MM_FROUND_NEARBYINT);
}
__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_mask_cvtts_roundpd_epi32(W,U,A,_MM_FROUND_NEARBYINT);
}
__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>
return _mm256_maskz_cvtts_roundpd_epi32(U,A,_MM_FROUND_NEARBYINT);
}
__m128i test_mm_cvttspd_epu32(__m128d A){
// CHECK-LABEL: @test_mm_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
return _mm_cvttspd_epu32(A);
}
__m128i test_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_mask_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
return _mm_mask_cvttspd_epu32(W,U,A);
}
__m128i test_mm_maskz_cvttspd_epu32( __mmask8 U, __m128d A){
// CHECK-LABEL: @test_mm_maskz_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>
return _mm_maskz_cvttspd_epu32(U,A);
}
__m128i test_mm256_cvttspd_epu32(__m256d A){
// CHECK-LABEL: @test_mm256_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_cvttspd_epu32(A);
}
__m128i test_mm256_mask_cvttspd_epu32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_mask_cvttspd_epu32(W,U,A);
}
__m128i test_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_maskz_cvttspd_epu32(U,A);
}
__m128i test_mm256_cvtts_roundpd_epu32(__m256d A){
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_cvtts_roundpd_epu32(A, _MM_FROUND_NEARBYINT);
}
__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W,__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_mask_cvtts_roundpd_epu32(W,U,A,_MM_FROUND_NEARBYINT);
}
__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>
+// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>
return _mm256_maskz_cvtts_roundpd_epu32(U,A,_MM_FROUND_NEARBYINT);
}
__m128i test_mm_cvttsps_epi32(__m128 A){
// CHECK-LABEL: @test_mm_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
return _mm_cvttsps_epi32(A);
}
__m128i test_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_mask_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
return _mm_mask_cvttsps_epi32(W,U,A);
}
__m128i test_mm_maskz_cvttsps_epi32( __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_maskz_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>
return _mm_maskz_cvttsps_epi32(U,A);
}
__m256i test_mm256_cvttsps_epi32(__m256 A){
// CHECK-LABEL: @test_mm256_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_cvttsps_epi32(A);
}
__m256i test_mm256_mask_cvttsps_epi32(__m256i W,__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_mask_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_mask_cvttsps_epi32(W,U,A);
}
__m256i test_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_maskz_cvttsps_epi32(U,A);
}
__m256i test_mm256_cvtts_roundps_epi32(__m256 A){
// CHECK-LABEL: @test_mm256_cvtts_roundps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_cvtts_roundps_epi32(A, _MM_FROUND_NEARBYINT);
}
__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W,__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_mask_cvtts_roundps_epi32(W,U,A,_MM_FROUND_NEARBYINT);
}
__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>
return _mm256_maskz_cvtts_roundps_epi32(U,A,_MM_FROUND_NEARBYINT);
}
__m128i test_mm_cvttsps_epu32(__m128 A){
// CHECK-LABEL: @test_mm_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
return _mm_cvttsps_epu32(A);
}
__m128i test_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_mask_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
return _mm_mask_cvttsps_epu32(W,U,A);
}
__m128i test_mm_maskz_cvttsps_epu32( __mmask8 U, __m128 A){
// CHECK-LABEL: @test_mm_maskz_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>
return _mm_maskz_cvttsps_epu32(U,A);
}
__m256i test_mm256_cvttsps_epu32(__m256 A){
// CHECK-LABEL: @test_mm256_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_cvttsps_epu32(A);
}
__m256i test_mm256_mask_cvttsps_epu32(__m256i W,__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_mask_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_mask_cvttsps_epu32(W,U,A);
}
__m256i test_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_maskz_cvttsps_epu32(U,A);
}
__m256i test_mm256_cvtts_roundps_epu32(__m256 A){
// CHECK-LABEL: @test_mm256_cvtts_roundps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_cvtts_roundps_epu32(A, _MM_FROUND_NEARBYINT);
}
__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W,__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_mask_cvtts_roundps_epu32(W,U,A,_MM_FROUND_NEARBYINT);
}
__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A){
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu32
-// CHECK: @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>
+// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>
return _mm256_maskz_cvtts_roundps_epu32(U,A,_MM_FROUND_NEARBYINT);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5df5348c993b76..5262e3154ff721 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5522,100 +5522,100 @@ let TargetPrefix = "x86" in {
// conversion with saturation
let TargetPrefix = "x86" in {
- def int_x86_avx512_vcvttss2sis : ClangBuiltin<"__builtin_ia32_vcvttss2sis32">,
+ def int_x86_avx10_vcvttss2sis : ClangBuiltin<"__builtin_ia32_vcvttss2sis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttss2sis64 : ClangBuiltin<"__builtin_ia32_vcvttss2sis64">,
+ def int_x86_avx10_vcvttss2sis64 : ClangBuiltin<"__builtin_ia32_vcvttss2sis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttss2usis : ClangBuiltin<"__builtin_ia32_vcvttss2usis32">,
+ def int_x86_avx10_vcvttss2usis : ClangBuiltin<"__builtin_ia32_vcvttss2usis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttss2usis64 : ClangBuiltin<"__builtin_ia32_vcvttss2usis64">,
+ def int_x86_avx10_vcvttss2usis64 : ClangBuiltin<"__builtin_ia32_vcvttss2usis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsd2sis : ClangBuiltin<"__builtin_ia32_vcvttsd2sis32">,
+ def int_x86_avx10_vcvttsd2sis : ClangBuiltin<"__builtin_ia32_vcvttsd2sis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsd2sis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2sis64">,
+ def int_x86_avx10_vcvttsd2sis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2sis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsd2usis : ClangBuiltin<"__builtin_ia32_vcvttsd2usis32">,
+ def int_x86_avx10_vcvttsd2usis : ClangBuiltin<"__builtin_ia32_vcvttsd2usis32">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvttsd2usis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2usis64">,
+ def int_x86_avx10_vcvttsd2usis64 : ClangBuiltin<"__builtin_ia32_vcvttsd2usis64">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_vcvttpd2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs128_mask">,
+ def int_x86_avx10_mask_vcvttpd2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttpd2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2dqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2dqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2dqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs128_mask">,
+ def int_x86_avx10_mask_vcvttpd2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty,llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttpd2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2udqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2udqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2udqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs128_mask">,
+ def int_x86_avx10_mask_vcvttpd2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty,llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttpd2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2qqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2qqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2qqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs128_mask">,
+ def int_x86_avx10_mask_vcvttpd2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty,llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttpd2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttpd2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttpd2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttpd2uqqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs128_mask">,
+ def int_x86_avx10_mask_vcvttps2dqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttps2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2dqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttps2dqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2dqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttps2dqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2dqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs128_mask">,
+ def int_x86_avx10_mask_vcvttps2udqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttps2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2udqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttps2udqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2udqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttps2udqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2udqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs128_mask">,
+ def int_x86_avx10_mask_vcvttps2qqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttps2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2qqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttps2qqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2qqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttps2qqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2qqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs128_mask">,
+ def int_x86_avx10_mask_vcvttps2uqqs_128 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs128_mask">,
DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty,llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_vcvttps2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2uqqs256_round_mask">,
+ def int_x86_avx10_mask_vcvttps2uqqs_round_256: ClangBuiltin<"__builtin_ia32_vcvttps2uqqs256_round_mask">,
DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvttps2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs512_round_mask">,
+ def int_x86_avx10_mask_vcvttps2uqqs_round_512 : ClangBuiltin<"__builtin_ia32_vcvttps2uqqs512_round_mask">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ef435540eb63ea..ada2bbaffd6645 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -630,26 +630,26 @@ defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
//-------------------------------------------------
// Convert Double to Signed/Unsigned Doubleword with truncation.
-multiclass avx512_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeSAE,
- X86SchedWriteWidths sched> {
- let Predicates = [HasAVX10_2, HasAVX10_2_512] in {
+multiclass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
- OpNodeSAE, sched.ZMM>, EVEX_V512;
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasAVX10_2, HasVLX] in {
+ let Predicates = [HasAVX10_2] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
- f128mem, VK2WM>, EVEX_V128;
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ f128mem, VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
- MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- let Predicates = [HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE,
- sched.YMM>, EVEX_V256;
+ sched.YMM>, EVEX_V256;
}
@@ -706,110 +706,111 @@ multiclass avx512_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator Op
}
// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled
-multiclass avx512_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- let Predicates = [HasDQI, HasAVX10_2, HasAVX10_2_512] in {
+multiclass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
- MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
- OpNodeRnd, sched.ZMM>, EVEX_V512;
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasDQI, HasAVX10_2, HasVLX] in {
+ let Predicates = [HasAVX10_2] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
- MaskOpNode, sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
- MaskOpNode, sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
- let Predicates = [HasDQI, HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info,
- OpNodeRnd, sched.YMM>, EVEX_V256;
+ OpNodeRnd, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Quardword with truncation
-multiclass avx512_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- let Predicates = [HasDQI, HasAVX10_2, HasAVX10_2_512] in {
+multiclass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasAVX10_2_512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
- MaskOpNode, sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
- OpNodeRnd, sched.ZMM>, EVEX_V512;
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasDQI, HasAVX10_2, HasVLX] in {
+ let Predicates = [HasAVX10_2] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
- (v2i64 (OpNode (bc_v4f32 (v2f64
- (scalar_to_vector (loadf64 addr:$src)))))),
- (v2i64 (MaskOpNode (bc_v4f32 (v2f64
- (scalar_to_vector (loadf64 addr:$src))))))>,
- EVEX_V128;
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32 (v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32 (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
- MaskOpNode, sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
- let Predicates = [HasDQI, HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd,
- sched.YMM>, EVEX_V256;
+ sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword with truncation
-multiclass avx512_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+multiclass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX10_2, HasAVX10_2_512] in {
+ let Predicates = [HasAVX10_2_512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
- MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
- OpNodeSAE, sched.ZMM>, EVEX_V512;
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasAVX10_2, HasVLX] in {
+ let Predicates = [HasAVX10_2] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
- MaskOpNode, sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
- MaskOpNode, sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
- let Predicates = [HasAVX10_2, HasVLX], hasEVEX_U=1 in {
+ let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f32x_info,
- OpNodeSAE, sched.YMM>, EVEX_V256;
+ OpNodeSAE, sched.YMM>, EVEX_V256;
}
}
-defm VCVTTPD2DQS : avx512_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis,
- X86cvttp2sis, X86cvttp2sisSAE,
- SchedWriteCvtPD2DQ>,
- PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
-defm VCVTTPD2UDQS : avx512_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis,
- X86cvttp2uis, X86cvttp2uisSAE,
- SchedWriteCvtPD2DQ>,
- REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2DQS : avx512_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis,
- X86cvttp2sis, X86cvttp2sisSAE,
- SchedWriteCvtPS2DQ>, T_MAP5,PS,
- EVEX_CD8<32, CD8VF>;
-defm VCVTTPS2UDQS : avx512_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis,
- X86cvttp2uis, X86cvttp2uisSAE,
- SchedWriteCvtPS2DQ>, T_MAP5,PS, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2QQS : avx512_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis,
- X86cvttp2sis, X86cvttp2sisSAE,
- SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
- EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2QQS : avx512_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis,
- X86cvttp2sis, X86cvttp2sisSAE,
- SchedWriteCvtPS2DQ>, T_MAP5,PD,
- EVEX_CD8<32, CD8VH>;
-defm VCVTTPD2UQQS : avx512_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis,
- X86cvttp2uis, X86cvttp2uisSAE,
- SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
- EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UQQS : avx512_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis,
- X86cvttp2uis, X86cvttp2uisSAE,
- SchedWriteCvtPS2DQ>, T_MAP5,PD,
- EVEX_CD8<32, CD8VH>;
+defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPD2DQ>,
+ PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
+defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPD2DQ>,
+ REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PS,
+ EVEX_CD8<32, CD8VF>;
+defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PS,
+ EVEX_CD8<32, CD8VF>;
+defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
+ EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis,
+ X86cvttp2sis, X86cvttp2sisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PD,
+ EVEX_CD8<32, CD8VH>;
+defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
+ EVEX_CD8<64, CD8VF>;
+defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis,
+ X86cvttp2uis, X86cvttp2uisSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5,PD,
+ EVEX_CD8<32, CD8VH>;
let Predicates = [HasAVX10_2] in {
// Special patterns to allow use of X86mcvttp2si for masking. Instruction
@@ -869,10 +870,10 @@ def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
}
// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation.
-multiclass avx512_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
- X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
- SDNode OpNodeInt, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched> {
+multiclass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
+ X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
+ SDNode OpNodeInt, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched> {
let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
@@ -903,38 +904,38 @@ multiclass avx512_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
}
}
-defm VCVTTSS2SIS: avx512_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info,
- fp_to_sint_sat, X86cvttss2Int,
- X86cvttss2IntSAE, WriteCvtSS2I>,
+defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSS2I>,
+ T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSS2I>,
+ REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSD2I>,
+ T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info,
+ fp_to_sint_sat, X86cvttss2Int,
+ X86cvttss2IntSAE, WriteCvtSD2I>,
+ REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSS2I>,
T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64S: avx512_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info,
- fp_to_sint_sat, X86cvttss2Int,
- X86cvttss2IntSAE, WriteCvtSS2I>,
- REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIS: avx512_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info,
- fp_to_sint_sat, X86cvttss2Int,
- X86cvttss2IntSAE, WriteCvtSD2I>,
+defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSS2I>,
+ T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSD2I>,
T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64S: avx512_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info,
- fp_to_sint_sat, X86cvttss2Int,
- X86cvttss2IntSAE, WriteCvtSD2I>,
- REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSS2USIS: avx512_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info,
- fp_to_uint_sat, X86cvttss2UInt,
- X86cvttss2UIntSAE, WriteCvtSS2I>,
- T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64S: avx512_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info,
- fp_to_uint_sat, X86cvttss2UInt,
- X86cvttss2UIntSAE, WriteCvtSS2I>,
- T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2USIS: avx512_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info,
- fp_to_uint_sat, X86cvttss2UInt,
- X86cvttss2UIntSAE, WriteCvtSD2I>,
- T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64S: avx512_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info,
- fp_to_uint_sat, X86cvttss2UInt,
- X86cvttss2UIntSAE, WriteCvtSD2I>,
- T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info,
+ fp_to_uint_sat, X86cvttss2UInt,
+ X86cvttss2UIntSAE, WriteCvtSD2I>,
+ T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>;
//-------------------------------------------------
// AVX10 CONVERT instructions
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0e6c82e4e2e96c..86fd04046d16a0 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -569,12 +569,36 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
X86_INTRINSIC_DATA(avx10_mask_vcvttpd2dq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2dqs_128, CVTPD2DQ_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::MCVTTP2SIS),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2dqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2dqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2qqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_128, CVTPD2DQ_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::MCVTTP2SIS),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2udqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttpd2uqq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2uqqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2uqqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttpd2uqqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttph2dq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttph2ibs128, INTR_TYPE_1OP_MASK,
@@ -601,6 +625,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2dq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2dqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2dqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2dqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2ibs128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2IBS, 0),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2ibs256, INTR_TYPE_1OP_MASK,
@@ -615,10 +645,28 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTTP2IUBS, X86ISD::CVTTP2IUBS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2qq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2qqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2qqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2qqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2udq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2udqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2udqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2udqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vcvttps2uqq256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2uqqs_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, 0),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2uqqs_round_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vcvttps2uqqs_round_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx10_mask_vfcmaddcph256, CFMA_OP_MASK, X86ISD::VFCMADDC,
X86ISD::VFCMADDC_RND),
X86_INTRINSIC_DATA(avx10_mask_vfcmulcph256, INTR_TYPE_2OP_MASK,
@@ -757,6 +805,22 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTTP2IUBS, 0),
X86_INTRINSIC_DATA(avx10_vcvttnebf162iubs512, INTR_TYPE_1OP,
X86ISD::CVTTP2IUBS, 0),
+ X86_INTRINSIC_DATA(avx10_vcvttsd2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttsd2sis64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttsd2usis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UIS,
+ X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttsd2usis64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttss2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
+ X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttss2sis64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttss2usis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UIS,
+ X86ISD::CVTTS2UIS_SAE),
+ X86_INTRINSIC_DATA(avx10_vcvttss2usis64, INTR_TYPE_1OP_SAE,
+ X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
X86_INTRINSIC_DATA(avx10_vdivpd256, INTR_TYPE_2OP, ISD::FDIV,
X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx10_vdivph256, INTR_TYPE_2OP, ISD::FDIV,
@@ -834,6 +898,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx10_vsubps256, INTR_TYPE_2OP, ISD::FSUB,
X86ISD::FSUB_RND),
+
X86_INTRINSIC_DATA(avx2_mpsadbw, INTR_TYPE_3OP_IMM8, X86ISD::MPSADBW, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
@@ -1351,54 +1416,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, CVTPS2PH_MASK,
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_128, CVTPD2DQ_MASK,
- X86ISD::CVTTP2SIS, X86ISD::MCVTTP2SIS),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2dqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2qqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_128, CVTPD2DQ_MASK,
- X86ISD::CVTTP2UIS, X86ISD::MCVTTP2SIS),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2udqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttpd2uqqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2dqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2qqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SIS, X86ISD::CVTTP2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2udqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_round_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_mask_vcvttps2uqqs_round_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UIS, X86ISD::CVTTP2UIS_SAE),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
@@ -1574,23 +1591,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTS2UI_RND),
X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI,
X86ISD::CVTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_vcvttsd2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
- X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsd2sis64, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsd2usis, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttsd2usis64, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttss2sis, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SIS,
- X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttss2sis64, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2SIS, X86ISD::CVTTS2SIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttss2usis, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
- X86_INTRINSIC_DATA(avx512_vcvttss2usis64, INTR_TYPE_1OP_SAE,
- X86ISD::CVTTS2UIS, X86ISD::CVTTS2UIS_SAE),
-
X86_INTRINSIC_DATA(avx512_vfmadd_f32, INTR_TYPE_3OP, ISD::FMA,
X86ISD::FMADD_RND),
X86_INTRINSIC_DATA(avx512_vfmadd_f64, INTR_TYPE_3OP, ISD::FMA,
@@ -1603,6 +1603,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMADDSUB_RND),
X86_INTRINSIC_DATA(avx512_vfmaddsub_ps_512, INTR_TYPE_3OP, X86ISD::FMADDSUB,
X86ISD::FMADDSUB_RND),
+
X86_INTRINSIC_DATA(avx512_vpdpbusd_128, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
X86_INTRINSIC_DATA(avx512_vpdpbusd_256, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
X86_INTRINSIC_DATA(avx512_vpdpbusd_512, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
index 5d556dedcf8722..652c35c7770918 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll
@@ -17,7 +17,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512(<8 x double> %x0, <8 x i32>
; X86-NEXT: vcvttpd2dqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6d,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -33,7 +33,7 @@ define <8 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_512_z(<8 x double> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -49,7 +49,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_undef(<8 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -65,10 +65,10 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2dqs_512_default(<8 x double>* %x0)
; X86-NEXT: vcvttpd2dqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x double>, <8 x double> * %x0
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512(<8 x double> %x0, <8 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_512:
@@ -84,7 +84,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512(<8 x double> %x0, <8 x i32
; X86-NEXT: vcvttpd2udqs %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x49,0x6c,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> %x0, <8 x i32> %src, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -100,7 +100,7 @@ define <8 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_512_z(<8 x double> %x0, i8 %m
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -116,7 +116,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512_undef(<8 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> %x0, <8 x i32> undef, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -132,10 +132,10 @@ define <8 x i32> @test_int_x86_mask_vcvtt_pd2udqs_512_default(<8 x double>* %x0)
; X86-NEXT: vcvttpd2udqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfc,0x48,0x6c,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x double>, <8 x double> * %x0
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512( <8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double>, <8 x i32>, i8 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_512:
@@ -151,7 +151,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512(<8 x double> %x0, <8 x i64>
; X86-NEXT: vcvttpd2qqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6d,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -167,7 +167,7 @@ define <8 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_512_z(<8 x double> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -183,7 +183,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_undef(<8 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -199,10 +199,10 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2qqs_512_default(<8 x double>* %x0)
; X86-NEXT: vcvttpd2qqs (%eax), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x double>, <8 x double>* %x0
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
+declare <8 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512(<8 x double> %x0, <8 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_512:
@@ -218,7 +218,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512(<8 x double> %x0, <8 x i64
; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x49,0x6c,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> %x0, <8 x i64> %src, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -234,7 +234,7 @@ define <8 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_512_z(<8 x double> %x0, i8 %m
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -250,7 +250,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_undef(<8 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> %x0, <8 x i64> undef, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -266,10 +266,10 @@ define <8 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_512_default(<8 x double>* %x0)
; X86-NEXT: vcvttpd2uqqs (%eax), %zmm0 # encoding: [0x62,0xf5,0xfd,0x48,0x6c,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x double>, <8 x double>* %x0
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512( <8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> %x10, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
+declare <8 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double>, <8 x i64>, i8 , i32)
define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_512:
@@ -285,7 +285,7 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512(<16 x float> %x0, <16 x i3
; X86-NEXT: vcvttps2dqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6d,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -301,7 +301,7 @@ define <16 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_512_z(<16 x float> %x0, i16 %
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -317,7 +317,7 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_undef(<16 x float> %x0, i1
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -333,10 +333,10 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2dqs_512_default(<16 x float>* %x0)
; X86-NEXT: vcvttps2dqs (%eax), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <16 x float>, <16 x float>* %x0
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
ret <16 x i32> %res
}
-declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
+declare <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512(<16 x float> %x0, <16 x i32> %src, i16 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_512:
@@ -352,7 +352,7 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512(<16 x float> %x0, <16 x i
; X86-NEXT: vcvttps2udqs %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6c,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> %x0, <16 x i32> %src, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -368,7 +368,7 @@ define <16 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_512_z(<16 x float> %x0, i16
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> %x0, <16 x i32> zeroinitializer, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -384,7 +384,7 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_undef(<16 x float> %x0, i
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512( <16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> %x0, <16 x i32> undef, i16 %mask, i32 4)
ret <16 x i32> %res
}
@@ -400,10 +400,10 @@ define <16 x i32> @test_int_x86_mask_vcvtt_ps2udqs_512_default(<16 x float>* %x0
; X86-NEXT: vcvttps2dqs (%eax), %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <16 x float>, <16 x float>* %x0
- %res = call <16 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.512( <16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
+ %res = call <16 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> %x10, <16 x i32> undef, i16 -1, i32 4)
ret <16 x i32> %res
}
-declare <16 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
+declare <16 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float>, <16 x i32>, i16 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_512:
@@ -419,7 +419,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512(<8 x float> %x0, <8 x i64>
; X86-NEXT: vcvttps2qqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6d,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -435,7 +435,7 @@ define <8 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_512_z(<8 x float> %x0, i8 %mas
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -451,7 +451,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_undef(<8 x float> %x0, i8 %
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -460,10 +460,10 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2qqs_512_default(<8 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2qqs %ymm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6d,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
+declare <8 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512(<8 x float> %x0, <8 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_512:
@@ -479,7 +479,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512(<8 x float> %x0, <8 x i64>
; X86-NEXT: vcvttps2uqqs %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6c,0xc8]
; X86-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> %x0, <8 x i64> %src, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -495,7 +495,7 @@ define <8 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_512_z(<8 x float> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> %x0, <8 x i64> zeroinitializer, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -511,7 +511,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_undef(<8 x float> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> %x0, <8 x i64> undef, i8 %mask, i32 4)
ret <8 x i64> %res
}
@@ -520,7 +520,7 @@ define <8 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_512_default(<8 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2uqqs %ymm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512( <8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
+ %res = call <8 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> %x0, <8 x i64> undef, i8 -1, i32 4)
ret <8 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
+declare <8 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float>, <8 x i64>, i8 , i32)
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
index 84374216c0e89f..922ac92be174a8 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll
@@ -9,12 +9,12 @@ define i32 @test_x86_avx512_vcvttsd2usis(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6c,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double> %a0, i32 8) ;
+ %res0 = call i32 @llvm.x86.avx10.vcvttsd2usis(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx10.vcvttsd2usis(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttsd2usis(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx10.vcvttsd2usis(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_vcvttsd2sis(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttsd2sis:
@@ -23,12 +23,12 @@ define i32 @test_x86_avx512_vcvttsd2sis(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %eax # encoding: [0x62,0xf5,0x7f,0x18,0x6d,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double> %a0, i32 8) ;
+ %res0 = call i32 @llvm.x86.avx10.vcvttsd2sis(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx10.vcvttsd2sis(<2 x double> %a0, i32 8) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttsd2sis(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx10.vcvttsd2sis(<2 x double>, i32) nounwind readnone
define i32 @test_x86_avx512_vcvttss2sis(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttss2sis:
@@ -37,12 +37,12 @@ define i32 @test_x86_avx512_vcvttss2sis(<4 x float> %a0) {
; CHECK-NEXT: vcvttss2sis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a0, i32 4) ;
+ %res0 = call i32 @llvm.x86.avx10.vcvttss2sis(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx10.vcvttss2sis(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttss2sis(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx10.vcvttss2sis(<4 x float>, i32) nounwind readnone
define i32 @test_x86_avx512_vcvttss2sis_load(ptr %a0) {
; X64-LABEL: test_x86_avx512_vcvttss2sis_load:
@@ -56,7 +56,7 @@ define i32 @test_x86_avx512_vcvttss2sis_load(ptr %a0) {
; X86-NEXT: vcvttss2sis (%eax), %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%a1 = load <4 x float>, ptr %a0
- %res = call i32 @llvm.x86.avx512.vcvttss2sis(<4 x float> %a1, i32 4) ;
+ %res = call i32 @llvm.x86.avx10.vcvttss2sis(<4 x float> %a1, i32 4) ;
ret i32 %res
}
@@ -67,12 +67,12 @@ define i32 @test_x86_avx512_vcvttss2usis(<4 x float> %a0) {
; CHECK-NEXT: vcvttss2usis %xmm0, %eax # encoding: [0x62,0xf5,0x7e,0x08,0x6c,0xc0]
; CHECK-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res0 = call i32 @llvm.x86.avx512.vcvttss2usis(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.vcvttss2usis(<4 x float> %a0, i32 4) ;
+ %res0 = call i32 @llvm.x86.avx10.vcvttss2usis(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx10.vcvttss2usis(<4 x float> %a0, i32 4) ;
%res2 = add i32 %res0, %res1
ret i32 %res2
}
-declare i32 @llvm.x86.avx512.vcvttss2usis(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx10.vcvttss2usis(<4 x float>, i32) nounwind readnone
define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_256:
@@ -90,7 +90,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256(<4 x double> %x0, <4 x i32>
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -108,7 +108,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_256_z(<4 x double> %x0, i8 %ma
; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -126,7 +126,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256_undef(<4 x double> %x0, i8
; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6d,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -142,10 +142,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_256_default(<4 x double>* %xptr
; X86-NEXT: vcvttpd2dqsy (%eax), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x0 = load <4 x double>, <4 x double> * %xptr
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 -1, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 -1, i32 4)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256(<4 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_256:
@@ -163,7 +163,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256(<4 x double> %x0, <4 x i32
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> %src, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -181,7 +181,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_256_z(<4 x double> %x0, i8 %m
; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> zeroinitializer, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -199,7 +199,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256_undef(<4 x double> %x0, i8
; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0xa9,0x6c,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.256( <4 x double> %x0, <4 x i32> undef, i8 %mask, i32 4)
ret <4 x i32> %res
}
@@ -216,10 +216,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_256_default(<4 x double>* %x0)
; X86-NEXT: vcvttpd2udqsy (%eax), %xmm0 # encoding: [0x62,0xf5,0xfc,0x28,0x6c,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <4 x double>, <4 x double> * %x0
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256( <4 x double> %x10, <4 x i32> undef, i8 -1, i32 4)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.256( <4 x double> %x10, <4 x i32> undef, i8 -1, i32 4)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double>, <4 x i32>, i8 , i32)
define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_256:
@@ -235,7 +235,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256(<4 x double> %x0, <4 x i64>
; X86-NEXT: vcvttpd2qqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6d,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -251,7 +251,7 @@ define <4 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_256_z(<4 x double> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -267,7 +267,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256_undef(<4 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -284,10 +284,10 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2qqs_256_default(<4 x double>* %x0)
; X86-NEXT: vcvttpd2qqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <4 x double>, <4 x double>* %x0
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256(<4 x double> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_256:
@@ -303,7 +303,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256(<4 x double> %x0, <4 x i64
; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x29,0x6c,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> %src, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -319,7 +319,7 @@ define <4 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_256_z(<4 x double> %x0, i8 %m
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -335,7 +335,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256_undef(<4 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256( <4 x double> %x0, <4 x i64> undef, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -352,10 +352,10 @@ define <4 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_256_default(<4 x double>* %x0)
; X86-NEXT: vcvttpd2uqqs (%eax), %ymm0 # encoding: [0x62,0xf5,0xfd,0x28,0x6c,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <4 x double>, <4 x double>* %x0
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256( <4 x double> %x10, <4 x i64> undef, i8 -1, i32 4)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>, <4 x i64>, i8 , i32)
define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2dqs_256:
@@ -371,7 +371,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256(<8 x float> %x0, <8 x i32>
; X86-NEXT: vcvttps2dqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6d,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -387,7 +387,7 @@ define <8 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_256_z(<8 x float> %x0, i8 %mas
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -403,7 +403,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256_undef(<8 x float> %x0, i8 %
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -419,10 +419,10 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2dqs_256_default(<8 x float>* %x0) {
; X86-NEXT: vcvttps2dqs (%eax), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6d,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x float>, <8 x float>* %x0
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256(<8 x float> %x0, <8 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_256:
@@ -438,7 +438,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256(<8 x float> %x0, <8 x i32>
; X86-NEXT: vcvttps2udqs %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6c,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> %src, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -454,7 +454,7 @@ define <8 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_256_z(<8 x float> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> zeroinitializer, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -470,7 +470,7 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256_undef(<8 x float> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.256( <8 x float> %x0, <8 x i32> undef, i8 %mask, i32 4)
ret <8 x i32> %res
}
@@ -487,10 +487,10 @@ define <8 x i32> @test_int_x86_mask_vcvtt_ps2udqs_256_default(<8 x float>* %x0)
; X86-NEXT: vcvttps2udqs (%eax), %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6c,0x00]
; X86-NEXT: retl # encoding: [0xc3]
%x10 = load <8 x float>, <8 x float>* %x0
- %res = call <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
+ %res = call <8 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.256( <8 x float> %x10, <8 x i32> undef, i8 -1, i32 4)
ret <8 x i32> %res
}
-declare <8 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float>, <8 x i32>, i8 , i32)
define <4 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_256_z(<4 x float> %x0, i8 %mask) {
; X64-LABEL: test_int_x86_maskz_vcvtt_ps2qqs_256_z:
@@ -504,7 +504,7 @@ define <4 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_256_z(<4 x float> %x0, i8 %mas
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -520,10 +520,10 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2qqs_256_undef(<4 x float> %x0, i8 %
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256(<4 x float> %x0, <4 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_256:
@@ -539,7 +539,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256(<4 x float> %x0, <4 x i64>
; X86-NEXT: vcvttps2uqqs %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6c,0xc8]
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> %src, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> %src, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -555,7 +555,7 @@ define <4 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_256_z(<4 x float> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> zeroinitializer, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -571,7 +571,7 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256_undef(<4 x float> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 %mask, i32 4)
ret <4 x i64> %res
}
@@ -581,11 +581,11 @@ define <4 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_256_default(<4 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2uqqs %xmm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 -1, i32 4)
+ %res = call <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.256( <4 x float> %x0, <4 x i64> undef, i8 -1, i32 4)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
+declare <4 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>, <4 x i64>, i8 , i32)
define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2dqs_128:
@@ -601,7 +601,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128(<2 x double> %x0, <4 x i32>
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
ret <4 x i32> %res
}
@@ -617,7 +617,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_pd2dqs_128_z(<2 x double> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
@@ -633,7 +633,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128_undef(<2 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
ret <4 x i32> %res
}
@@ -643,10 +643,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2dqs_128_default(<2 x double> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfc,0x08,0x6d,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2dqs.128(<2 x double>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128(<2 x double> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2udqs_128:
@@ -662,7 +662,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128(<2 x double> %x0, <4 x i32
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> %src, i8 %mask)
ret <4 x i32> %res
}
@@ -678,7 +678,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_pd2udqs_128_z(<2 x double> %x0, i8 %m
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfc,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
@@ -694,7 +694,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_undef(<2 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 {%k1} # encoding: [0x62,0xf5,0xfc,0x09,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 %mask)
ret <4 x i32> %res
}
@@ -704,10 +704,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_pd2udqs_128_default(<2 x double> %x0)
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfc,0x08,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128( <2 x double> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttpd2udqs.128(<2 x double>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double>, <4 x i32>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2qqs_128:
@@ -723,7 +723,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128(<2 x double> %x0, <2 x i64>
; X86-NEXT: vcvttpd2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6d,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
ret <2 x i64> %res
}
@@ -739,7 +739,7 @@ define <2 x i64> @test_int_x86_maskz_vcvtt_pd2qqs_128_z(<2 x double> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
@@ -755,7 +755,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128_undef(<2 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
ret <2 x i64> %res
}
@@ -765,10 +765,10 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2qqs_128_default(<2 x double> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2qqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfd,0x08,0x6d,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
-declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2qqs.128(<2 x double>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128(<2 x double> %x0, <2 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_pd2uqqs_128:
@@ -784,7 +784,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128(<2 x double> %x0, <2 x i64
; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0xfd,0x09,0x6c,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> %src, i8 %mask)
ret <2 x i64> %res
}
@@ -800,7 +800,7 @@ define <2 x i64> @test_int_x86_maskz_vcvtt_pd2uqqs_128_z(<2 x double> %x0, i8 %m
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
@@ -816,7 +816,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128_undef(<2 x double> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfd,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 %mask)
ret <2 x i64> %res
}
@@ -826,17 +826,17 @@ define <2 x i64> @test_int_x86_mask_vcvtt_pd2uqqs_128_default(<2 x double> %x0)
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2uqqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0xfd,0x08,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.128( <2 x double> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
-declare <2 x i64> @llvm.x86.avx512.mask.vcvttpd2uqqs.128(<2 x double>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_default(<4 x float> %x0) {
; CHECK-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_default:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6d,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
@@ -854,7 +854,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128(<4 x float> %x0, <4 x i32>
; X86-NEXT: vcvttps2dqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6d,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_128_z(<4 x float> %x0, i8 %mask) {
@@ -869,7 +869,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_ps2dqs_128_z(<4 x float> %x0, i8 %mas
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_undef(<4 x float> %x0, i8 %mask) {
@@ -884,7 +884,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_undef(<4 x float> %x0, i8 %
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_default(<4 x float> %x0) {
@@ -892,10 +892,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2dqs_128_default(<4 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2dqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6d,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2dqs.128(<4 x float>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float>, <4 x i32>, i8)
define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128(<4 x float> %x0, <4 x i32> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2udqs_128:
@@ -911,7 +911,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128(<4 x float> %x0, <4 x i32>
; X86-NEXT: vcvttps2udqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6c,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> %src, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_128_z(<4 x float> %x0, i8 %mask) {
@@ -926,7 +926,7 @@ define <4 x i32> @test_int_x86_maskz_vcvtt_ps2udqs_128_z(<4 x float> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_undef(<4 x float> %x0, i8 %mask) {
@@ -941,7 +941,7 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_undef(<4 x float> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 %mask)
ret <4 x i32> %res
}
define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_default(<4 x float> %x0) {
@@ -949,10 +949,10 @@ define <4 x i32> @test_int_x86_mask_vcvtt_ps2udqs_128_default(<4 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2udqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
+ %res = call <4 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.128( <4 x float> %x0, <4 x i32> undef, i8 -1)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vcvttps2udqs.128(<4 x float>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float>, <4 x i32>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_undef(<4 x float> %x0, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2qqs_128_undef:
@@ -966,7 +966,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128_undef(<4 x float> %x0, i8 %
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
ret <2 x i64> %res
}
@@ -982,7 +982,7 @@ define <2 x i64> @test_int_x86_maskz_vcvtt_ps2qqs_128_z(<4 x float> %x0, i8 %mas
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6d,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
@@ -1000,10 +1000,10 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2qqs_128(<4 x float> %x0, <2 x i64>
; X86-NEXT: vcvttps2qqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6d,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
ret <2 x i64> %res
}
-declare <2 x i64> @llvm.x86.avx512.mask.vcvttps2qqs.128(<4 x float>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float>, <2 x i64>, i8)
define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128(<4 x float> %x0, <2 x i64> %src, i8 %mask) {
; X64-LABEL: test_int_x86_mask_vcvtt_ps2uqqs_128:
@@ -1019,7 +1019,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128(<4 x float> %x0, <2 x i64>
; X86-NEXT: vcvttps2uqqs %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6c,0xc8]
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> %src, i8 %mask)
ret <2 x i64> %res
}
@@ -1035,7 +1035,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128_undef(<4 x float> %x0, i8
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 %mask)
ret <2 x i64> %res
}
@@ -1044,7 +1044,7 @@ define <2 x i64> @test_int_x86_mask_vcvtt_ps2uqqs_128_default(<4 x float> %x0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6c,0xc0]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> undef, i8 -1)
ret <2 x i64> %res
}
define <2 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_128_z(<4 x float> %x0, i8 %mask) {
@@ -1059,8 +1059,8 @@ define <2 x i64> @test_int_x86_maskz_vcvtt_ps2uqqs_128_z(<4 x float> %x0, i8 %ma
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6c,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
- %res = call <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
+ %res = call <2 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.128( <4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
ret <2 x i64> %res
}
-declare <2 x i64> @llvm.x86.avx512.mask.vcvttps2uqqs.128(<4 x float>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float>, <2 x i64>, i8)
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
index 460411e4222dee..f5be929bc85ce9 100644
--- a/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll
@@ -8,12 +8,12 @@ define i64 @test_x86_avx512_vcvttsd2si64(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6d,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx10.vcvttsd2sis64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx10.vcvttsd2sis64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttsd2sis64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx10.vcvttsd2sis64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_vcvttsd2usi64(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttsd2usi64:
@@ -22,12 +22,12 @@ define i64 @test_x86_avx512_vcvttsd2usi64(<2 x double> %a0) {
; CHECK-NEXT: vcvttsd2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xff,0x18,0x6c,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx10.vcvttsd2usis64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx10.vcvttsd2usis64(<2 x double> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttsd2usis64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx10.vcvttsd2usis64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_avx512_vcvttss2sis64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttss2sis64:
@@ -36,12 +36,12 @@ define i64 @test_x86_avx512_vcvttss2sis64(<4 x float> %a0) {
; CHECK-NEXT: vcvttss2sis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6d,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx10.vcvttss2sis64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx10.vcvttss2sis64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttss2sis64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx10.vcvttss2sis64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_vcvttss2usis64(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx512_vcvttss2usis64:
@@ -50,9 +50,9 @@ define i64 @test_x86_avx512_vcvttss2usis64(<4 x float> %a0) {
; CHECK-NEXT: vcvttss2usis {sae}, %xmm0, %rax # encoding: [0x62,0xf5,0xfe,0x18,0x6c,0xc0]
; CHECK-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
- %res0 = call i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float> %a0, i32 4) ;
- %res1 = call i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float> %a0, i32 8) ;
+ %res0 = call i64 @llvm.x86.avx10.vcvttss2usis64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx10.vcvttss2usis64(<4 x float> %a0, i32 8) ;
%res2 = add i64 %res0, %res1
ret i64 %res2
}
-declare i64 @llvm.x86.avx512.vcvttss2usis64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx10.vcvttss2usis64(<4 x float>, i32) nounwind readnone
>From 0ccaae1f26e38ba2f6ca000355c8ec6139dde78d Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Mon, 9 Sep 2024 19:39:13 +0800
Subject: [PATCH 6/8] review6
---
clang/lib/Headers/avx10_2satcvtdsintrin.h | 202 +++++++++++++---------
1 file changed, 122 insertions(+), 80 deletions(-)
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index af79ff4e044fec..85a4a532abde23 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -65,23 +65,27 @@
#endif /* __x86_64__ */
// 128 Bit : Double -> int
-#define _mm_cvttspd_epi32(A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epi32(__m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
+ (__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
-#define _mm_mask_cvttspd_epi32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epi32(__m128i W,
+ __mmask8 U,
+ __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)A, (__v4si)W, U));
+}
-#define _mm_maskz_cvttspd_epi32(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttspd_epi32(__mmask16 U, __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
+ (__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
+}
// 256 Bit : Double -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)(__m256d)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
@@ -112,18 +116,23 @@ _mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
(int)(R)))
// 128 Bit : Double -> uint
-#define _mm_cvttspd_epu32(A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epu32(__m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+ (__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
-#define _mm_mask_cvttspd_epu32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- ((__v2df)(__m128d)A), (__v4si)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epu32(__m128i W,
+ __mmask8 U,
+ __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+ (__v2df)A, (__v4si)(__m128i)W, (__mmask8)U));
+}
-#define _mm_maskz_cvttspd_epu32(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( \
- (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttspd_epu32(__mmask8 U, __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
+ (__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
+}
// 256 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu32(__m256d A) {
@@ -159,17 +168,23 @@ _mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
(int)(R)))
// 128 Bit : Double -> long
-#define _mm_cvttspd_epi64(A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epi64(__m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
+ (__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
-#define _mm_mask_cvttspd_epi64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
- (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epi64(__m128i W,
+ __mmask8 U,
+ __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)A, (__v2di)W,
+ (__mmask8)U));
+}
-#define _mm_maskz_cvttspd_epi64(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttspd_epi64(__mmask8 U, __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
+ (__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 Bit : Double -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
@@ -203,17 +218,23 @@ _mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 Bit : Double -> ulong
-#define _mm_cvttspd_epu64(A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epu64(__m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
+ (__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
-#define _mm_mask_cvttspd_epu64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)(__m128d)A, (__v2di)W, \
- (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epu64(__m128i W,
+ __mmask8 U,
+ __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)A, (__v2di)W,
+ (__mmask8)U));
+}
-#define _mm_maskz_cvttspd_epu64(U, A) \
- ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( \
- (__v2df)(__m128d)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttspd_epu64(__mmask8 U, __m128d A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
+ (__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 Bit : Double -> ulong
@@ -248,18 +269,23 @@ _mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 Bit : float -> int
-#define _mm_cvttsps_epi32(A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epi32(__m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
+ (__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
-#define _mm_mask_cvttsps_epi32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epi32(__m128i W,
+ __mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)A, (__v4si)W,
+ (__mmask8)U));
+}
-#define _mm_maskz_cvttsps_epi32(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epi32(__mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
+ (__v4sf)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 Bit : float -> int
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
@@ -271,14 +297,13 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)(__m256)A, (__v8si)_mm256_setzero_si256(), U,
- _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
}
#define _mm256_cvtts_roundps_epi32(A, R) \
@@ -296,18 +321,23 @@ _mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
(int)(R)))
// 128 Bit : float -> uint
-#define _mm_cvttsps_epu32(A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8)(-1)))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epu32(__m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
+ (__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+}
-#define _mm_mask_cvttsps_epu32(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epu32(__m128i W,
+ __mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)A, (__v4si)W,
+ (__mmask8)U));
+}
-#define _mm_maskz_cvttsps_epu32(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( \
- (__v4sf)(__m128)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epu32(__mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
+ (__v4sf)A, (__v4si)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 Bit : float -> uint
@@ -344,17 +374,23 @@ _mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
(int)(R)))
// 128 bit : float -> long
-#define _mm_cvttsps_epi64(A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epi64(__m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+ (__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
-#define _mm_mask_cvttsps_epi64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epi64(__m128i W,
+ __mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+ (__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
+}
-#define _mm_maskz_cvttsps_epi64(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epi64(__mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
+ (__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 bit : float -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
@@ -388,17 +424,23 @@ _mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
(__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 bit : float -> ulong
-#define _mm_cvttsps_epu64(A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_undefined_si128(), (__mmask8) - 1))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epu64(__m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+ (__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+}
-#define _mm_mask_cvttsps_epu64(W, U, A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)(__m128i)W, (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epu64(__m128i W,
+ __mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+ (__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
+}
-#define _mm_maskz_cvttsps_epu64(U, A) \
- ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( \
- (__v4sf)(__m128)A, (__v2di)_mm_setzero_si128(), (__mmask8)U))
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epu64(__mmask8 U,
+ __m128 A) {
+ return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
+ (__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+}
// 256 bit : float -> ulong
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
>From 4f25bcb64fdca9abd9d4492808233db0f3398f7d Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Tue, 10 Sep 2024 12:22:29 +0800
Subject: [PATCH 7/8] review7
---
clang/lib/Headers/avx10_2satcvtdsintrin.h | 141 +++++++++++-----------
1 file changed, 71 insertions(+), 70 deletions(-)
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 85a4a532abde23..9c47617b20b4c6 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -16,10 +16,14 @@
#define __AVX10_2SATCVTDSINTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
+#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
__min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
#define _mm_cvtts_roundsd_i32(A, R) \
((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(A), (const int)(R)))
@@ -65,37 +69,37 @@
#endif /* __x86_64__ */
// 128 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epi32(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
(__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epi32(__m128i W,
- __mmask8 U,
- __m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)A, (__v4si)W, U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvttspd_epi32(__mmask16 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
(__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
}
// 256 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi32(__m256d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epi32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
(__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
@@ -116,38 +120,38 @@ _mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
(int)(R)))
// 128 Bit : Double -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epu32(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
(__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epu32(__m128i W,
- __mmask8 U,
- __m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
(__v2df)A, (__v4si)(__m128i)W, (__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvttspd_epu32(__mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
(__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
}
// 256 Bit : Double -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu32(__m256d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epu32(__m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
(__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
@@ -168,38 +172,38 @@ _mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
(int)(R)))
// 128 Bit : Double -> long
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epi64(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
(__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epi64(__m128i W,
- __mmask8 U,
- __m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)A, (__v2di)W,
(__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvttspd_epi64(__mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
(__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
}
// 256 Bit : Double -> long
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epi64(__m256d A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epi64(__m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
(__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttspd_epi64(__m256i W, __mmask8 U, __m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
(__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
(__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -218,19 +222,18 @@ _mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 Bit : Double -> ulong
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttspd_epu64(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
(__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttspd_epu64(__m128i W,
- __mmask8 U,
- __m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)A, (__v2di)W,
(__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_cvttspd_epu64(__mmask8 U, __m128d A) {
return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
(__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
@@ -238,19 +241,20 @@ _mm_maskz_cvttspd_epu64(__mmask8 U, __m128d A) {
// 256 Bit : Double -> ulong
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttspd_epu64(__m256d A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttspd_epu64(__m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
(__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttspd_epu64(__m256i W, __mmask8 U, __m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
(__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
(__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -269,38 +273,37 @@ _mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
(__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 Bit : float -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epi32(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
(__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epi32(__m128i W,
- __mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)A, (__v4si)W,
(__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epi32(__mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epi32(__mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
(__v4sf)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U));
}
// 256 Bit : float -> int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi32(__m256 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epi32(__m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
(__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
(__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
(__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -321,39 +324,38 @@ _mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
(int)(R)))
// 128 Bit : float -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epu32(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
(__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epu32(__m128i W,
- __mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)A, (__v4si)W,
(__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epu32(__mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epu32(__mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
(__v4sf)A, (__v4si)_mm_setzero_si128(), (__mmask8)U));
}
// 256 Bit : float -> uint
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu32(__m256 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epu32(__m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
(__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttsps_epu32(__m256i W, __mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
(__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
(__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -374,37 +376,36 @@ _mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
(int)(R)))
// 128 bit : float -> long
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epi64(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
(__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epi64(__m128i W,
- __mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
(__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epi64(__mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
(__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
}
// 256 bit : float -> long
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epi64(__m128 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epi64(__m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
(__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttsps_epi64(__m256i W, __mmask8 U, __m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
(__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
(__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -424,38 +425,37 @@ _mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
(__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
// 128 bit : float -> ulong
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttsps_epu64(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
(__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttsps_epu64(__m128i W,
- __mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
(__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttsps_epu64(__mmask8 U,
- __m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
(__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
}
// 256 bit : float -> ulong
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttsps_epu64(__m128 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epu64(__m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
(__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvttsps_epu64(__m256i W, __mmask8 U, __m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
(__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
(__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
@@ -474,5 +474,6 @@ _mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
(__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
#endif // __AVX10_2SATCVTDSINTRIN_H
>From 173b7265460528345f09572f973c665717dc1590 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Tue, 10 Sep 2024 16:20:00 +0800
Subject: [PATCH 8/8] format
---
clang/lib/Headers/avx10_2_512satcvtdsintrin.h | 246 +++++------
clang/lib/Headers/avx10_2satcvtdsintrin.h | 397 +++++++++---------
2 files changed, 333 insertions(+), 310 deletions(-)
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 79a8699bc7e164..5970ab03314441 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,278 +20,284 @@
__min_vector_width__(512)))
// 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
- (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
+_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
- (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
+_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
- (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundpd_epi32(A, R) \
+#define _mm512_cvtts_roundpd_epi32(__A, __R) \
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+#define _mm512_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
+ (const int)(__R)))
// 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
- (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
+_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
- (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
+_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
- (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundpd_epu32(A, R) \
+#define _mm512_cvtts_roundpd_epu32(__A, __R) \
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+#define _mm512_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \
+ (const int)(__R)))
// 512 bit : Double -> Long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
- (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
+_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
- (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
+_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
- (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundpd_epi64(A, R) \
+#define _mm512_cvtts_roundpd_epi64(__A, __R) \
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+#define _mm512_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+ (const int)(__R)))
// 512 bit : Double -> ULong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
- (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
+_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
- (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
+_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
- (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundpd_epu64(A, R) \
+#define _mm512_cvtts_roundpd_epu64(__A, __R) \
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+#define _mm512_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \
- (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+ (const int)(__R)))
// 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
- (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+ (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i W, __mmask16 U, __m512 A) {
+_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
- (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+ (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 U, __m512 A) {
+_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
- (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+ (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundps_epi32(A, R) \
+#define _mm512_cvtts_roundps_epi32(__A, __R) \
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \
- (__mmask16) - 1, (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+#define _mm512_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \
- (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+#define _mm512_maskz_cvtts_roundps_epi32(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \
- (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(__U), (const int)(__R)))
// 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
- (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+ (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i W, __mmask16 U, __m512 A) {
+_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
- (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+ (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 U, __m512 A) {
+_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
- (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+ (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundps_epu32(A, R) \
+#define _mm512_cvtts_roundps_epu32(__A, __R) \
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(), \
- (__mmask16) - 1, (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16) - 1, (const int)(__R)))
-#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+#define _mm512_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U), \
- (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+#define _mm512_maskz_cvtts_roundps_epu32(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \
- (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U), \
- (const int)(R)))
+ (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(__U), (const int)(__R)))
// 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
- (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
+_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
- (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
+_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
- (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundps_epi64(A, R) \
+#define _mm512_cvtts_roundps_epi64(__A, __R) \
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(__R)))
-#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+#define _mm512_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+#define _mm512_maskz_cvtts_roundps_epi64(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+ (const int)(__R)))
// 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
- (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+ (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
+_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
- (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
+_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
- (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm512_cvtts_roundps_epu64(A, R) \
+#define _mm512_cvtts_roundps_epu64(__A, __R) \
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \
+ (const int)(__R)))
-#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+#define _mm512_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U), \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \
+ (const int)(__R)))
-#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+#define _mm512_maskz_cvtts_roundps_epu64(__U, __A, __R) \
((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \
- (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U), \
- (const int)(R)))
+ (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \
+ (const int)(__R)))
#undef __DEFAULT_FN_ATTRS
#endif // __AVX10_2_512SATCVTDSINTRIN_H
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 9c47617b20b4c6..59028436311e73 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -24,455 +24,472 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
__min_vector_width__(128)))
-#define _mm_cvtts_roundsd_i32(A, R) \
- ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(A), (const int)(R)))
+#define _mm_cvtts_roundsd_i32(__A, __R) \
+ ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(__A), (const int)(__R)))
-#define _mm_cvtts_roundsd_si32(A, R) \
- ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(A), (const int)(R)))
+#define _mm_cvtts_roundsd_si32(__A, __R) \
+ ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(__A), (const int)(__R)))
-#define _mm_cvtts_roundsd_u32(A, R) \
- ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundsd_u32(__A, __R) \
+ ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundss_i32(A, R) \
- ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
+#define _mm_cvtts_roundss_i32(__A, __R) \
+ ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
-#define _mm_cvtts_roundss_si32(A, R) \
- ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
+#define _mm_cvtts_roundss_si32(__A, __R) \
+ ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
-#define _mm_cvtts_roundss_u32(A, R) \
- ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundss_u32(__A, __R) \
+ ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(__A), \
+ (const int)(__R)))
#ifdef __x86_64__
-#define _mm_cvtts_roundss_u64(A, R) \
- ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundss_u64(__A, __R) \
+ ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundsd_u64(A, R) \
- ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundsd_u64(__A, __R) \
+ ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundss_i64(A, R) \
- ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
+#define _mm_cvtts_roundss_i64(__A, __R) \
+ ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundss_si64(A, R) \
- ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
+#define _mm_cvtts_roundss_si64(__A, __R) \
+ ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundsd_si64(A, R) \
- ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundsd_si64(__A, __R) \
+ ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \
+ (const int)(__R)))
-#define _mm_cvtts_roundsd_i64(A, R) \
- ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A), \
- (const int)(R)))
+#define _mm_cvtts_roundsd_i64(__A, __R) \
+ ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \
+ (const int)(__R)))
#endif /* __x86_64__ */
// 128 Bit : Double -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
- (__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+ (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)A, (__v4si)W, U));
+_mm_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)__A, (__v4si)__W,
+ __U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttspd_epi32(__mmask16 U, __m128d A) {
+_mm_maskz_cvttspd_epi32(__mmask16 __U, __m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(
- (__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
+ (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U));
}
// 256 Bit : Double -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvttspd_epi32(__m256d A) {
+_mm256_cvttspd_epi32(__m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m256d A) {
+_mm256_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A) {
+_mm256_maskz_cvttspd_epi32(__mmask8 __U, __m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundpd_epi32(A, R) \
+#define _mm256_cvtts_roundpd_epi32(__A, __R) \
((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(__R)))
-#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
- (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)__U, (int)(__R)))
// 128 Bit : Double -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
- (__v2df)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+ (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A) {
+_mm_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
- (__v2df)A, (__v4si)(__m128i)W, (__mmask8)U));
+ (__v2df)__A, (__v4si)(__m128i)__W, (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttspd_epu32(__mmask8 U, __m128d A) {
+_mm_maskz_cvttspd_epu32(__mmask8 __U, __m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask(
- (__v2df)A, (__v4si)(__m128i)_mm_setzero_si128(), U));
+ (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U));
}
// 256 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvttspd_epu32(__m256d A) {
+_mm256_cvttspd_epu32(__m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m256d A) {
+_mm256_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A) {
+_mm256_maskz_cvttspd_epu32(__mmask8 __U, __m256d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)A, (__v4si)_mm_setzero_si128(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundpd_epu32(A, R) \
+#define _mm256_cvtts_roundpd_epu32(__A, __R) \
((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
+ (__mmask8) - 1, (int)(__R)))
-#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)W, (__mmask8)U, (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U, \
- (int)(R)))
+ (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)__U, (int)(__R)))
// 128 Bit : Double -> long
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
- (__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+ (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)A, (__v2di)W,
- (__mmask8)U));
+_mm_mask_cvttspd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)__A, (__v2di)__W,
+ (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttspd_epi64(__mmask8 U, __m128d A) {
+_mm_maskz_cvttspd_epi64(__mmask8 __U, __m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask(
- (__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+ (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 Bit : Double -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvttspd_epi64(__m256d A) {
+_mm256_cvttspd_epi64(__m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttspd_epi64(__m256i W, __mmask8 U, __m256d A) {
+_mm256_mask_cvttspd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A) {
+_mm256_maskz_cvttspd_epi64(__mmask8 __U, __m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundpd_epi64(A, R) \
+#define _mm256_cvtts_roundpd_epi64(__A, __R) \
((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)__R))
-#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)A, (__v4di)W, \
- (__mmask8)U, (int)R))
+#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
+ ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \
+ (__mmask8)__U, (int)__R))
-#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
// 128 Bit : Double -> ulong
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
- (__v2df)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+ (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A) {
- return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)A, (__v2di)W,
- (__mmask8)U));
+_mm_mask_cvttspd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+ return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)__A, (__v2di)__W,
+ (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttspd_epu64(__mmask8 U, __m128d A) {
+_mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) {
return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask(
- (__v2df)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+ (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 Bit : Double -> ulong
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvttspd_epu64(__m256d A) {
+_mm256_cvttspd_epu64(__m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttspd_epu64(__m256i W, __mmask8 U, __m256d A) {
+_mm256_mask_cvttspd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A) {
+_mm256_maskz_cvttspd_epu64(__mmask8 __U, __m256d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundpd_epu64(A, R) \
+#define _mm256_cvtts_roundpd_epu64(__A, __R) \
((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, (int)R))
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)__R))
-#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask((__v4df)A, (__v4di)W, \
- (__mmask8)U, (int)R))
+#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
+ ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
+ (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
// 128 Bit : float -> int
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
- (__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+ (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A) {
- return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)A, (__v4si)W,
- (__mmask8)U));
+_mm_mask_cvttsps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+ return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)__A, (__v4si)__W,
+ (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttsps_epi32(__mmask8 U, __m128 A) {
+_mm_maskz_cvttsps_epi32(__mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
- (__v4sf)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U));
+ (__v4sf)__A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 Bit : float -> int
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epi32(__m256 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epi32(__m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttsps_epi32(__m256i W, __mmask8 U, __m256 A) {
+_mm256_mask_cvttsps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A) {
+_mm256_maskz_cvttsps_epi32(__mmask8 __U, __m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundps_epi32(A, R) \
+#define _mm256_cvtts_roundps_epi32(__A, __R) \
((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(__R)))
-#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
- (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)__U, (int)(__R)))
// 128 Bit : float -> uint
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
- (__v4sf)A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
+ (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1)));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A) {
- return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)A, (__v4si)W,
- (__mmask8)U));
+_mm_mask_cvttsps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+ return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)__A, (__v4si)__W,
+ (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttsps_epu32(__mmask8 U, __m128 A) {
+_mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
- (__v4sf)A, (__v4si)_mm_setzero_si128(), (__mmask8)U));
+ (__v4sf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 Bit : float -> uint
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epu32(__m256 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epu32(__m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttsps_epu32(__m256i W, __mmask8 U, __m256 A) {
+_mm256_mask_cvttsps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A) {
+_mm256_maskz_cvttsps_epu32(__mmask8 __U, __m256 __A) {
return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundps_epu32(A, R) \
+#define _mm256_cvtts_roundps_epu32(__A, __R) \
((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
+ (__mmask8) - 1, (int)(__R)))
-#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)W, (__mmask8)U, (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)A, (__v8si)(__m256i)_mm256_setzero_si256(), (__mmask8)U, \
- (int)(R)))
+ (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)__U, (int)(__R)))
// 128 bit : float -> long
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
- (__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+ (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A) {
+_mm_mask_cvttsps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
- (__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
+ (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
+_mm_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
- (__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+ (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 bit : float -> long
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epi64(__m128 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epi64(__m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttsps_epi64(__m256i W, __mmask8 U, __m128 A) {
+_mm256_mask_cvttsps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A) {
+_mm256_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundps_epi64(A, R) \
+#define _mm256_cvtts_roundps_epi64(__A, __R) \
((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)R))
+ (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)__R))
-#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+ (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+ (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
+ (int)__R))
// 128 bit : float -> ulong
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
- (__v4sf)A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
+ (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A) {
+_mm_mask_cvttsps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
- (__v4sf)A, (__v2di)(__m128i)W, (__mmask8)U));
+ (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
+_mm_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
- (__v4sf)A, (__v2di)_mm_setzero_si128(), (__mmask8)U));
+ (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U));
}
// 256 bit : float -> ulong
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttsps_epu64(__m128 A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttsps_epu64(__m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvttsps_epu64(__m256i W, __mmask8 U, __m128 A) {
+_mm256_mask_cvttsps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)W, U, _MM_FROUND_CUR_DIRECTION));
+ (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A) {
+_mm256_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) {
return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)A, (__v4di)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+ (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
+ _MM_FROUND_CUR_DIRECTION));
}
-#define _mm256_cvtts_roundps_epu64(A, R) \
+#define _mm256_cvtts_roundps_epu64(__A, __R) \
((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)R))
+ (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
+ (int)__R))
-#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+#define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)W, (__mmask8)U, (int)R))
+ (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+#define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R) \
((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
- (__v4sf)(__m128)A, (__v4di)_mm256_setzero_si256(), (__mmask8)U, (int)R))
+ (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
+ (int)__R))
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
More information about the cfe-commits
mailing list