[clang] [llvm] [X86][AVX10.2] Support saturated converts (PR #102592)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 9 02:54:19 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Malay Sanghi (MalaySanghi)

<details>
<summary>Changes</summary>

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

---

Patch is 494.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102592.diff


31 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.def (+30) 
- (modified) clang/include/clang/Basic/BuiltinsX86_64.def (+6) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (added) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+302) 
- (added) clang/lib/Headers/avx10_2satcvtdsintrin.h (+453) 
- (modified) clang/lib/Headers/immintrin.h (+8) 
- (modified) clang/lib/Sema/SemaX86.cpp (+26) 
- (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-errors.c (+52) 
- (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64-error.c (+76) 
- (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+184) 
- (added) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+183) 
- (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c (+57) 
- (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+223) 
- (added) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+220) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+100) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+21-2) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+18) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+311) 
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+12) 
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+65-1) 
- (added) llvm/test/CodeGen/X86/avx10_2_512satcvtds-intrinsics.ll (+548) 
- (added) llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll (+115) 
- (added) llvm/test/CodeGen/X86/avx10_2satcvtds-intrinsics.ll (+1098) 
- (added) llvm/test/CodeGen/X86/avx10_2satcvtds-x64-intrinsics.ll (+58) 
- (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-32.txt (+1043) 
- (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvtds-64.txt (+1171) 
- (added) llvm/test/MC/X86/avx10_2satcvtds-32-att.s (+1042) 
- (added) llvm/test/MC/X86/avx10_2satcvtds-32-intel.s (+1042) 
- (added) llvm/test/MC/X86/avx10_2satcvtds-64-att.s (+1170) 
- (added) llvm/test/MC/X86/avx10_2satcvtds-64-intel.s (+1170) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+160) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a696cf117908e2..a8639c341d0a43 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2122,6 +2122,36 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwuud256, "V8iV8iV8iV8i", "nV:256:", "avxvnniin
 TARGET_BUILTIN(__builtin_ia32_vpdpwuuds128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16|avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vpdpwuuds256, "V8iV8iV8iV8i", "nV:256:", "avxvnniint16|avx10.2-256")
 
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2si32, "iV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi32, "UiV2dIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2si32, "iV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi32, "UiV4fIi", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs128_mask, "V4iV2dV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udqs512_round_mask, "V8iV8dV8iUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs128_mask,  "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs128_mask, "V2OiV2dV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs256_round_mask, "V4OiV4dV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqqs512_round_mask, "V8OiV8dV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs128_mask, "V4iV4fV4iUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udqs512_round_mask, "V16iV16fV16iUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs128_mask, "V2OiV4fV2OiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs256_round_mask, "V4OiV4fV4OiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqqs512_round_mask, "V8OiV8fV8OiUcIi", "nV:512:", "avx10.2-512")
+
 // AVX-NE-CONVERT
 TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
 TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 5e00916d4b25ae..ed9b17b8bd7b8e 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -99,6 +99,12 @@ TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
 
+// AVX10.2 SATCVT-DS
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2si64, "OiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttssd2usi64, "UOiV2dIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2si64, "OiV4fIi", "ncV:128:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttsss2usi64, "UOiV4fIi", "ncV:128:", "avx10.2-512")
+
 // UINTR
 TARGET_BUILTIN(__builtin_ia32_clui, "v", "n", "uintr")
 TARGET_BUILTIN(__builtin_ia32_stui, "v", "n", "uintr")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index b61aeca6bbc910..9981290628697c 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -150,9 +150,11 @@ set(x86_files
   avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
   avx10_2_512satcvtintrin.h
+  avx10_2_512satcvtdsintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx10_2satcvtintrin.h
+  avx10_2satcvtdsintrin.h
   avx2intrin.h
   avx512bf16intrin.h
   avx512bitalgintrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
new file mode 100644
index 00000000000000..e8b815653c3d6e
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -0,0 +1,302 @@
+/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX10_2_512SATCVTDSINTRIN_H
+#define __AVX10_2_512SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+                 __min_vector_width__(512)))
+
+// 512 bit : Double -> Int
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
+      (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi32(A, R)                                       \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1,  \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R)                            \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U),               \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (const int)(R)))
+
+// 512 bit : Double -> uInt
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)A, (__v8si)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) {
+  return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
+      (__v8df)A, (__v8si)_mm256_setzero_si256(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu32(A, R)                                       \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8si)_mm256_undefined_si256(), (__mmask8) - 1,  \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R)                            \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8si)(__m256i)(W), (__mmask8)(U),               \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R)                              \
+  ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U),     \
+      (const int)(R)))
+
+#ifdef __x86_64__
+//  512 bit : Double -> Long
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
+      (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epi64(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1,  \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U),               \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(                          \
+      (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U),     \
+      (const int)(R)))
+
+// 512 bit : Double -> ULong
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) {
+  return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
+      (__v8df)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundpd_epu64(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1,  \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8di)(__m512i)(W), (__mmask8)(U),               \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(                         \
+      (__v8df)(__m512d)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U),     \
+      (const int)(R)))
+
+#endif
+
+// 512 bit: Float -> int
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi32(__m512i W, __mmask16 U, __m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi32(__mmask16 U, __m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
+      (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi32(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(),                 \
+      (__mmask16) - 1, (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U),             \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(                          \
+      (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U),   \
+      (const int)(R)))
+
+// 512 bit: Float -> uint
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu32(__m512i W, __mmask16 U, __m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(A), (__v16si)(W), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu32(__mmask16 U, __m512 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
+      (__v16sf)(A), (__v16si)_mm512_setzero_si512(), U,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu32(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(A), (__v16si)_mm512_undefined_epi32(),                 \
+      (__mmask16) - 1, (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(A), (__v16si)(__m512i)(W), (__mmask16)(U),             \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(                         \
+      (__v16sf)(__m512)(A), (__v16si)_mm512_setzero_si512(), (__mmask16)(U),   \
+      (const int)(R)))
+
+#ifdef __x86_64__
+// 512 bit : float -> long
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
+      (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epi64(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1,   \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U),                \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(                          \
+      (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U),      \
+      (const int)(R)))
+
+// 512 bit : float -> ulong
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)A, (__v8di)W, U, _MM_FROUND_CUR_DIRECTION));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) {
+  return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
+      (__v8sf)A, (__v8di)_mm512_setzero_si512(), U, _MM_FROUND_CUR_DIRECTION));
+}
+
+#define _mm512_cvtts_roundps_epu64(A, R)                                       \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1,   \
+      (const int)(R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R)                            \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(A), (__v8di)(__m512i)(W), (__mmask8)(U),                \
+      (const int)(R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R)                              \
+  ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(                         \
+      (__v8sf)(__m256)(A), (__v8di)_mm512_setzero_si512(), (__mmask8)(U),      \
+      (const int)(R)))
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+#endif // __AVX10_2_512SATCVTDSINTRIN_H
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
new file mode 100644
index 00000000000000..5588c9ccfa4319
--- /dev/null
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -0,0 +1,453 @@
+/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2SATCVTDSINTRIN_H
+#define __AVX10_2SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(256)))
+
+#define _mm_cvtt_roundssd_i32(A, R)                                            \
+  ((int)__builtin_ia32_vcvttssd2si32((__v2df)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtt_roundssd_si32(A, R)                                           \
+  ((int)__builtin_ia32_vcvttssd2si32...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/102592


More information about the llvm-commits mailing list