[clang] [llvm] [X86][AVX10.2] Support AVX10.2-SATCVT-DS new instructions. (PR #102592)

Phoebe Wang via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 5 05:40:20 PDT 2024


================
@@ -0,0 +1,443 @@
+/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2SATCVTDSINTRIN_H
+#define __AVX10_2SATCVTDSINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(256)))
+
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+
+#define _mm_cvtts_roundsd_i32(A, R)                                            \
+  ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundsd_si32(A, R)                                           \
+  ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(A), (const int)(R)))
+
+#define _mm_cvtts_roundsd_u32(A, R)                                            \
+  ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(A),           \
+                                               (const int)(R)))
+
+#define _mm_cvtts_roundss_i32(A, R)                                            \
+  ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundss_si32(A, R)                                           \
+  ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundss_u32(A, R)                                            \
+  ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(A),            \
+                                               (const int)(R)))
+
+#ifdef __x86_64__
+#define _mm_cvtts_roundss_u64(A, R)                                            \
+  ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(A),      \
+                                                     (const int)(R)))
+
+#define _mm_cvtts_roundsd_u64(A, R)                                            \
+  ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(A),     \
+                                                     (const int)(R)))
+
+#define _mm_cvtts_roundss_i64(A, R)                                            \
+  ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundss_si64(A, R)                                           \
+  ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(A), (const int)(R)))
+
+#define _mm_cvtts_roundsd_si64(A, R)                                           \
+  ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A),               \
+                                           (const int)(R)))
+
+#define _mm_cvtts_roundsd_i64(A, R)                                            \
+  ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(A),               \
+                                           (const int)(R)))
+#endif /* __x86_64__ */
+
+// 128 Bit : Double -> int
+#define _mm_cvttspd_epi32(A)                                                   \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(                                \
+      (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_undefined_si128(),              \
+      (__mmask8)(-1)))
+
+#define _mm_mask_cvttspd_epi32(W, U, A)                                        \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(                                \
+      (__v2df)(__m128d)A, (__v4si)(__m128i)W, (__mmask8)U))
+
+#define _mm_maskz_cvttspd_epi32(U, A)                                          \
+  ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask(                                \
+      (__v2df)(__m128d)A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)U))
+
+// 256 Bit : Double -> int
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
----------------
phoebewang wrote:

This is not correct, the `__min_vector_width__` is the max of all input/return parameters.

https://github.com/llvm/llvm-project/pull/102592


More information about the cfe-commits mailing list