[clang] [X86][AVX10.2] Use 's_' for saturate-convert intrinsics (PR #131592)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 18 21:07:22 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
- Add '_' after cvt[t]s intrinsics when 's' is for saturation;
- Add 's_' for all ipcvt[t] intrinsics since they are all saturation ones;
This is to solve potential confusion since 's' before a type usually represents for scalar.
Synced with GCC folks and they will change in the same way.
---
Patch is 234.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131592.diff
15 Files Affected:
- (modified) clang/lib/Headers/avx10_2_512convertintrin.h (+18-16)
- (modified) clang/lib/Headers/avx10_2_512satcvtdsintrin.h (+28-24)
- (modified) clang/lib/Headers/avx10_2_512satcvtintrin.h (+84-84)
- (modified) clang/lib/Headers/avx10_2convertintrin.h (+32-32)
- (modified) clang/lib/Headers/avx10_2satcvtdsintrin.h (+52-48)
- (modified) clang/lib/Headers/avx10_2satcvtintrin.h (+136-136)
- (modified) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+36-36)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+96-96)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+180-180)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c (+48-48)
- (modified) clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c (+37-37)
- (modified) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+72-72)
- (modified) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+288-288)
- (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c (+84-84)
- (modified) clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c (+72-72)
``````````diff
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index 516ccc68672d6..429faa930ecf8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) {
+_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A),
(__v32hf)(__B));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W);
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
+_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) {
return (__m512i)__builtin_ia32_selectb_512(
- (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B),
+ (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B),
(__v64qi)(__m512i)_mm512_setzero_si512());
}
@@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_bf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
@@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) {
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvts_ph_hf8(__m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
+_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) {
+_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) {
return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask(
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
}
diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
index 5970ab0331444..012a6282b5b18 100644
--- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h
@@ -20,20 +20,21 @@
__min_vector_width__(512)))
// 512 bit : Double -> Int
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit : Double -> uInt
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu32(__m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) {
return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask(
(__v8df)__A, (__v8si)_mm256_setzero_si256(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> Long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epi64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) {
// 512 bit : Double -> ULong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtts_pd_epu64(__m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
+_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) {
return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask(
(__v8df)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) {
(const int)(__R)))
// 512 bit: Float -> int
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit: Float -> uint
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
+_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
+_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) {
return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask(
(__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) {
(__mmask16)(__U), (const int)(__R)))
// 512 bit : float -> long
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
@@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) {
(const int)(__R)))
// 512 bit : float -> ulong
-static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION));
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) {
+_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) {
return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask(
(__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U,
_MM_FROUND_CUR_DIRECTION));
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 7f41deb5212c5..b58e3db8956d6 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -14,286 +14,286 @@
#ifndef __AVX10_2_512SATCVTINTRIN_H
#define __AVX10_2_512SATCVTINTRIN_H
-#define _mm512_ipcvtbf16_epi8(A) \
+#define _mm512_ipcvts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtbf16_epu8(A) \
+#define _mm512_ipcvts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvtbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvtbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epi8(A) \
+#define _mm512_ipcvtts_bf16_epi8(A) \
((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epi8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epi8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvttbf16_epu8(A) \
+#define _mm512_ipcvtts_bf16_epu8(A) \
((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A)))
-#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \
+#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)(__m512i)(W)))
-#define _mm512_maskz_ipcvttbf16_epu8(U, A) \
+#define _mm512_maskz_ipcvtts_bf16_epu8(U, A) \
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_ipcvttbf16_epu8(A), \
+ (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \
(__v32hi)_mm512_setzero_si512()))
-#define _mm512_ipcvtph_epi8(A) \
+#define _mm512_ipcvts_ph_epi8(A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
- (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_mask_ipcvtph_epi8(W, U, A) \
+#define _mm512_mask_ipcvts_ph_epi8(W, U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
(__v32hu)(W), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_maskz_ipcvtph_epi8(U, A) \
+#define _mm512_maskz_ipcvts_ph_epi8(U, A) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
(__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_ipcvt_roundph_epi8(A, R) \
+#define _mm512_ipcvts_roundph_epi8(A, R) \
((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131592
More information about the cfe-commits
mailing list