[llvm-branch-commits] [clang] [X86] Backport saturate-convert intrinsics renaming & YMM rounding intrinsics removal in AVX10.2 (PR #135549)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Apr 13 07:48:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
AVX10.2 YMM rounding instructions are removed from latest AVX10 technical paper. Remove all intrinsics from compiler.
AVX10.2 saturate-convert intrinsics are modified to use "s_" in the name. It is a consensus made with GCC folks to avoid future ambiguity.
---
Patch is 453.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135549.diff
21 Files Affected:
- (modified) clang/lib/Headers/avx10_2_512convertintrin.h (+24-22)
- (modified) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+28-24)
- (modified) clang/lib/Headers/avx10_2_512satcvtintrin.h (+84-84)
- (modified) clang/lib/Headers/avx10_2convertintrin.h (+50-62)
- (modified) clang/lib/Headers/avx10_2minmaxintrin.h (-45)
- (modified) clang/lib/Headers/avx10_2niintrin.h (-1666)
- (modified) clang/lib/Headers/avx10_2satcvtdsintrin.h (+52-158)
- (modified) clang/lib/Headers/avx10_2satcvtintrin.h (+88-200)
- (modified) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+54-54)
- (modified) clang/test/CodeGen/X86/avx10_2_512minmax-error.c (-11)
- (removed) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (-198)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+180-180)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+48-48)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+37-37)
- (modified) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+108-126)
- (modified) clang/test/CodeGen/X86/avx10_2minmax-builtins.c (-54)
- (modified) clang/test/CodeGen/X86/avx10_2ni-builtins.c (-2405)
- (modified) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+216-358)
- (removed) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c (-57)
- (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+84-156)
- (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+74-149)
``````````diff
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..ee8cbf28ca41c 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -78,20 +78,20 @@ _mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) {
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) {
+_mm512_cvts_biasph_bf8(__m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
(__mmask32)-1);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_bf8(
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_bf8(
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) {
+_mm512_maskz_cvts_biasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
(__mmask32)__U);
@@ -118,20 +118,20 @@ _mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) {
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) {
+_mm512_cvts_biasph_hf8(__m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
(__mmask32)-1);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_hf8(
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_hf8(
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) {
+_mm512_maskz_cvts_biasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) {
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
(__mmask32)__U);
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
__min_vector_width__(512)))
// 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> Long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> ULong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
(const int)(__R)))
// 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
#ifndef __AVX10_2_512SATCVTINTRIN_H
#define __AVX10_2_512SATCVTINTRIN_H
-#define _mm512_ipcvtbf16_epi8(A) \
+#define _mm512_ipcvts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtbf16_epu8(A) \
+#define _mm512_ipcvts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epi8(A) \
+#define _mm512_ipcvtts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epu8(A) \
+#define _mm512_ipcvtts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/135549
More information about the llvm-branch-commits
mailing list