[clang] [llvm] [X86][AVX10.2] Remove YMM rounding from VCVT[,T]PS2I[,U]BS (PR #132426)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 21 10:10:39 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

<details>
<summary>Changes</summary>

Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343

---

Patch is 125.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132426.diff


15 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.td (+8-8) 
- (modified) clang/lib/Headers/avx10_2satcvtintrin.h (+32-164) 
- (modified) clang/lib/Sema/SemaX86.cpp (-8) 
- (removed) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (-198) 
- (modified) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (-142) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+16-16) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (-19) 
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+12-12) 
- (modified) llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll (+32-416) 
- (modified) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-32.txt (-64) 
- (modified) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-64.txt (-64) 
- (modified) llvm/test/MC/X86/avx10.2satcvt-32-att.s (-64) 
- (modified) llvm/test/MC/X86/avx10.2satcvt-32-intel.s (-64) 
- (modified) llvm/test/MC/X86/avx10.2satcvt-64-att.s (-64) 
- (modified) llvm/test/MC/X86/avx10.2satcvt-64-intel.s (-64) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ea0d6df4a33c2..0e3e6a120f83b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4886,7 +4886,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvtph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvtph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4898,7 +4898,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvtph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvtph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4910,7 +4910,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvtps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvtps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4922,7 +4922,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvtps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvtps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4958,7 +4958,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvttph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvttph2ibs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4970,7 +4970,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvttph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short, _Constant int)">;
+  def vcvttph2iubs256_mask : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, _Float16>, _Vector<16, unsigned short>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4982,7 +4982,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvttps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvttps2ibs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4994,7 +4994,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
-  def vcvttps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char, _Constant int)">;
+  def vcvttps2iubs256_mask : X86Builtin<"_Vector<8, unsigned int>(_Vector<8, float>, _Vector<8, unsigned int>, unsigned char)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2satcvtintrin.h b/clang/lib/Headers/avx10_2satcvtintrin.h
index f0e6eba4bab81..2f1fad9eff730 100644
--- a/clang/lib/Headers/avx10_2satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtintrin.h
@@ -66,7 +66,7 @@
 
 #define _mm_ipcvts_ph_epi8(A)                                                  \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvts_ph_epi8(W, U, A)                                       \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A),            \
@@ -78,36 +78,20 @@
 
 #define _mm256_ipcvts_ph_epi8(A)                                               \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
 
 #define _mm256_mask_ipcvts_ph_epi8(W, U, A)                                    \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
-                                              (__v16hu)(W), (__mmask16)(U),    \
-                                              _MM_FROUND_CUR_DIRECTION))
+                                              (__v16hu)(W), (__mmask16)(U)))
 
 #define _mm256_maskz_ipcvts_ph_epi8(U, A)                                      \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
-      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvts_roundph_epi8(A, R)                                       \
-  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
-                                              (__v16hu)_mm256_setzero_si256(), \
-                                              (__mmask16) - 1, (const int)R))
-
-#define _mm256_mask_ipcvts_roundph_epi8(W, U, A, R)                            \
-  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
-      (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
-
-#define _mm256_maskz_ipcvts_roundph_epi8(U, A, R)                              \
-  ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
-                                              (__v16hu)_mm256_setzero_si256(), \
-                                              (__mmask16)(U), (const int)R))
+      (__mmask16)(U)))
 
 #define _mm_ipcvts_ph_epu8(A)                                                  \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvts_ph_epu8(W, U, A)                                       \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A),           \
@@ -119,36 +103,20 @@
 
 #define _mm256_ipcvts_ph_epu8(A)                                               \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
 
 #define _mm256_mask_ipcvts_ph_epu8(W, U, A)                                    \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A),          \
-                                               (__v16hu)(W), (__mmask16)(U),   \
-                                               _MM_FROUND_CUR_DIRECTION))
+                                               (__v16hu)(W), (__mmask16)(U)))
 
 #define _mm256_maskz_ipcvts_ph_epu8(U, A)                                      \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
-      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvts_roundph_epu8(A, R)                                       \
-  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      (const int)R))
-
-#define _mm256_mask_ipcvts_roundph_epu8(W, U, A, R)                            \
-  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
-
-#define _mm256_maskz_ipcvts_roundph_epu8(U, A, R)                              \
-  ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
-      (const int)R))
+      (__mmask16)(U)))
 
 #define _mm_ipcvts_ps_epi8(A)                                                  \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvts_ps_epi8(W, U, A)                                       \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A),             \
@@ -160,36 +128,19 @@
 
 #define _mm256_ipcvts_ps_epi8(A)                                               \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
 
 #define _mm256_mask_ipcvts_ps_epi8(W, U, A)                                    \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
-                                              (__v8su)(W), (__mmask8)(U),      \
-                                              _MM_FROUND_CUR_DIRECTION))
+                                              (__v8su)(W), (__mmask8)(U)))
 
 #define _mm256_maskz_ipcvts_ps_epi8(U, A)                                      \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
-      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
-      _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvts_roundps_epi8(A, R)                                       \
-  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
-                                              (__v8su)_mm256_setzero_si256(),  \
-                                              (__mmask8) - 1, (const int)R))
-
-#define _mm256_mask_ipcvts_roundps_epi8(W, U, A, R)                            \
-  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
-      (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
-
-#define _mm256_maskz_ipcvts_roundps_epi8(U, A, R)                              \
-  ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
-                                              (__v8su)_mm256_setzero_si256(),  \
-                                              (__mmask8)(U), (const int)R))
+      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
 
 #define _mm_ipcvts_ps_epu8(A)                                                  \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvts_ps_epu8(W, U, A)                                       \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A),            \
@@ -201,32 +152,15 @@
 
 #define _mm256_ipcvts_ps_epu8(A)                                               \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1))
 
 #define _mm256_mask_ipcvts_ps_epu8(W, U, A)                                    \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
-                                               (__v8su)(W), (__mmask8)(U),     \
-                                               _MM_FROUND_CUR_DIRECTION))
+                                               (__v8su)(W), (__mmask8)(U)))
 
 #define _mm256_maskz_ipcvts_ps_epu8(U, A)                                      \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U),    \
-      _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvts_roundps_epu8(A, R)                                       \
-  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
-                                               (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8) - 1, (const int)R))
-
-#define _mm256_mask_ipcvts_roundps_epu8(W, U, A, R)                            \
-  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
-
-#define _mm256_maskz_ipcvts_roundps_epu8(U, A, R)                              \
-  ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
-                                               (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8)(U), (const int)R))
+      (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U)))
 
 #define _mm_ipcvtts_bf16_epi8(A)                                               \
   ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A)))
@@ -280,7 +214,7 @@
 
 #define _mm_ipcvtts_ph_epi8(A)                                                 \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvtts_ph_epi8(W, U, A)                                      \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A),           \
@@ -292,36 +226,20 @@
 
 #define _mm256_ipcvtts_ph_epi8(A)                                              \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
 
 #define _mm256_mask_ipcvtts_ph_epi8(W, U, A)                                   \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A),          \
-                                               (__v16hu)(W), (__mmask16)(U),   \
-                                               _MM_FROUND_CUR_DIRECTION))
+                                               (__v16hu)(W), (__mmask16)(U)))
 
 #define _mm256_maskz_ipcvtts_ph_epi8(U, A)                                     \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
-      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvtts_roundph_epi8(A, R)                                      \
-  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      (const int)R))
-
-#define _mm256_mask_ipcvtts_roundph_epi8(W, U, A, R)                           \
-  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
-
-#define _mm256_maskz_ipcvtts_roundph_epi8(U, A, R)                             \
-  ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
-      (const int)R))
+      (__mmask16)(U)))
 
 #define _mm_ipcvtts_ph_epu8(A)                                                 \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvtts_ph_epu8(W, U, A)                                      \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A),          \
@@ -333,36 +251,20 @@
 
 #define _mm256_ipcvtts_ph_epu8(A)                                              \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      _MM_FROUND_CUR_DIRECTION))
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1))
 
 #define _mm256_mask_ipcvtts_ph_epu8(W, U, A)                                   \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A),         \
-                                                (__v16hu)(W), (__mmask16)(U),  \
-                                                _MM_FROUND_CUR_DIRECTION))
+                                                (__v16hu)(W), (__mmask16)(U)))
 
 #define _mm256_maskz_ipcvtts_ph_epu8(U, A)                                     \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
       (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()),                \
-      (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm256_ipcvtts_roundph_epu8(A, R)                                      \
-  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
-      (const int)R))
-
-#define _mm256_mask_ipcvtts_roundph_epu8(W, U, A, R)                           \
-  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
-
-#define _mm256_maskz_ipcvtts_roundph_epu8(U, A, R)                             \
-  ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U),  \
-      (const int)R))
+      (__mmask16)(U)))
 
 #define _mm_ipcvtts_ps_epi8(A)                                                 \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
 
 #define _mm_mask_ipcvtts_ps_epi8(W, U, A)                                      \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A),            \
@@ -374,36 +276,19 @@
 
 #define _mm256_ipcvtts_ps_epi8(A)                                              \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/132426


More information about the llvm-commits mailing list