[clang] 1312369 - [clang][x86] Enable SSE2/AVX/AVX512 setzero intrinsics in constant expressions
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 8 04:19:53 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-08T12:19:27+01:00
New Revision: 1312369afbeb2083094b3d34a88c346b22e86971
URL: https://github.com/llvm/llvm-project/commit/1312369afbeb2083094b3d34a88c346b22e86971
DIFF: https://github.com/llvm/llvm-project/commit/1312369afbeb2083094b3d34a88c346b22e86971.diff
LOG: [clang][x86] Enable SSE2/AVX/AVX512 setzero intrinsics in constant expressions
Basic setup for future constant expression tests
Added:
Modified:
clang/lib/Headers/avx512fintrin.h
clang/lib/Headers/avxintrin.h
clang/lib/Headers/emmintrin.h
clang/test/CodeGen/X86/avx-builtins.c
clang/test/CodeGen/X86/avx512f-builtins.c
clang/test/CodeGen/X86/sse2-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 4f172c74b31cbb..45e7eeb5327d0c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -175,12 +175,21 @@ typedef enum
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512f,no-evex512")))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
/* Create vectors with repeated elements */
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_setzero_si512(void)
-{
- return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setzero_si512(void) {
+ return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
}
#define _mm512_setzero_epi32 _mm512_setzero_si512
@@ -256,20 +265,16 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
(__v8di) _mm512_setzero_si512());
}
-
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_setzero_ps(void)
-{
- return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void) {
+ return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
}
#define _mm512_setzero _mm512_setzero_ps
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_setzero_pd(void)
-{
- return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setzero_pd(void) {
+ return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
}
static __inline __m512 __DEFAULT_FN_ATTRS512
@@ -9775,5 +9780,8 @@ _mm512_cvtsi512_si32(__m512i __A) {
#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif /* __AVX512FINTRIN_H */
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 73707c623065e7..bb43b292be01f6 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -66,6 +66,14 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
__min_vector_width__(128)))
#endif
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
/* Arithmetic */
/// Adds two 256-bit vectors of [4 x double].
///
@@ -4331,10 +4339,8 @@ _mm256_set1_epi64x(long long __q)
/// This intrinsic corresponds to the <c> VXORPS </c> instruction.
///
/// \returns A 256-bit vector of [4 x double] with all elements set to zero.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_setzero_pd(void)
-{
- return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void) {
+ return __extension__(__m256d){0.0, 0.0, 0.0, 0.0};
}
/// Constructs a 256-bit floating-point vector of [8 x float] with all
@@ -4345,9 +4351,7 @@ _mm256_setzero_pd(void)
/// This intrinsic corresponds to the <c> VXORPS </c> instruction.
///
/// \returns A 256-bit vector of [8 x float] with all elements set to zero.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_setzero_ps(void)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void) {
return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
}
@@ -4358,9 +4362,8 @@ _mm256_setzero_ps(void)
/// This intrinsic corresponds to the <c> VXORPS </c> instruction.
///
/// \returns A 256-bit integer vector initialized to zero.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_setzero_si256(void)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setzero_si256(void) {
return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
}
@@ -5130,6 +5133,8 @@ _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#endif /* __AVXINTRIN_H */
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 72a32f953e0118..d2121408c114b5 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -59,6 +59,12 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
__min_vector_width__(128)))
#endif
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __anyext128(x) \
@@ -1863,7 +1869,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w,
///
/// \returns An initialized 128-bit floating-point vector of [2 x double] with
/// all elements set to zero.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void) {
return __extension__(__m128d){0.0, 0.0};
}
@@ -3862,7 +3868,7 @@ _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
///
/// \returns An initialized 128-bit integer vector with all elements set to
/// zero.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void) {
return __extension__(__m128i)(__v2di){0LL, 0LL};
}
@@ -4900,6 +4906,7 @@ void _mm_pause(void);
#undef __anyext128
#undef __trunc64
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3e4ea6072c740d..9d6c1897f540d3 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2097,3 +2097,19 @@ float test_mm256_cvtss_f32(__m256 __a)
// CHECK: extractelement <8 x float> %{{.*}}, i32 0
return _mm256_cvtss_f32(__a);
}
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+void test_constexpr() {
+ constexpr __m256d v_mm256_setzero_pd = _mm256_setzero_pd();
+ static_assert(v_mm256_setzero_pd[0] == +0.0 && v_mm256_setzero_pd[1] == +0.0 && v_mm256_setzero_pd[2] == +0.0 && v_mm256_setzero_pd[3] == +0.0);
+
+ constexpr __m256 v_mm256_setzero_ps = _mm256_setzero_ps();
+ static_assert(v_mm256_setzero_ps[0] == +0.0f && v_mm256_setzero_ps[1] == +0.0f && v_mm256_setzero_ps[2] == +0.0f && v_mm256_setzero_ps[3] == +0.0f && v_mm256_setzero_ps[4] == +0.0f && v_mm256_setzero_ps[5] == +0.0f && v_mm256_setzero_ps[6] == +0.0f && v_mm256_setzero_ps[7] == +0.0f);
+
+ constexpr __m256i v_mm256_setzero_si256 = _mm256_setzero_si256();
+ static_assert(v_mm256_setzero_si256[0] == 0x0000000000000000ULL && v_mm256_setzero_si256[1] == 0x0000000000000000ULL && v_mm256_setzero_si256[2] == 0x0000000000000000ULL && v_mm256_setzero_si256[3] == 0x0000000000000000ULL);
+}
+
+#endif
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index e6a062b95ce24e..0b4f778a0637ab 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10880,3 +10880,25 @@ void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i _
// CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
_mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
}
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+void test_constexpr() {
+ constexpr __m512 v_mm512_setzero = _mm512_setzero();
+ static_assert(v_mm512_setzero[0] == +0.0f && v_mm512_setzero[1] == +0.0f && v_mm512_setzero[2] == +0.0f && v_mm512_setzero[3] == +0.0f && v_mm512_setzero[4] == +0.0f && v_mm512_setzero[5] == +0.0f && v_mm512_setzero[6] == +0.0f && v_mm512_setzero[7] == +0.0f && v_mm512_setzero[8] == +0.0f && v_mm512_setzero[9] == +0.0f && v_mm512_setzero[10] == +0.0f && v_mm512_setzero[11] == +0.0f && v_mm512_setzero[12] == +0.0f && v_mm512_setzero[13] == +0.0f && v_mm512_setzero[14] == +0.0f && v_mm512_setzero[15] == +0.0f);
+
+ constexpr __m512 v_mm512_setzero_ps = _mm512_setzero_ps();
+ static_assert(v_mm512_setzero_ps[0] == +0.0f && v_mm512_setzero_ps[1] == +0.0f && v_mm512_setzero_ps[2] == +0.0f && v_mm512_setzero_ps[3] == +0.0f && v_mm512_setzero_ps[4] == +0.0f && v_mm512_setzero_ps[5] == +0.0f && v_mm512_setzero_ps[6] == +0.0f && v_mm512_setzero_ps[7] == +0.0f && v_mm512_setzero_ps[8] == +0.0f && v_mm512_setzero_ps[9] == +0.0f && v_mm512_setzero_ps[10] == +0.0f && v_mm512_setzero_ps[11] == +0.0f && v_mm512_setzero_ps[12] == +0.0f && v_mm512_setzero_ps[13] == +0.0f && v_mm512_setzero_ps[14] == +0.0f && v_mm512_setzero_ps[15] == +0.0f);
+
+ constexpr __m512d v_mm512_setzero_pd = _mm512_setzero_pd();
+ static_assert(v_mm512_setzero_pd[0] == +0.0 && v_mm512_setzero_pd[1] == +0.0 && v_mm512_setzero_pd[2] == +0.0 && v_mm512_setzero_pd[3] == +0.0 && v_mm512_setzero_pd[4] == +0.0 && v_mm512_setzero_pd[5] == +0.0 && v_mm512_setzero_pd[6] == +0.0 && v_mm512_setzero_pd[7] == +0.0);
+
+ constexpr __m512i v_mm512_setzero_si512 = _mm512_setzero_si512();
+ static_assert(v_mm512_setzero_si512[0] == 0x0000000000000000ULL && v_mm512_setzero_si512[1] == 0x0000000000000000ULL && v_mm512_setzero_si512[2] == 0x0000000000000000ULL && v_mm512_setzero_si512[3] == 0x0000000000000000ULL && v_mm512_setzero_si512[4] == 0x0000000000000000ULL && v_mm512_setzero_si512[5] == 0x0000000000000000ULL && v_mm512_setzero_si512[6] == 0x0000000000000000ULL && v_mm512_setzero_si512[7] == 0x0000000000000000ULL);
+
+ constexpr __m512i v_mm512_setzero_epi32 = _mm512_setzero_epi32();
+ static_assert(v_mm512_setzero_epi32[0] == 0x0000000000000000ULL && v_mm512_setzero_epi32[1] == 0x0000000000000000ULL && v_mm512_setzero_epi32[2] == 0x0000000000000000ULL && v_mm512_setzero_epi32[3] == 0x0000000000000000ULL && v_mm512_setzero_epi32[4] == 0x0000000000000000ULL && v_mm512_setzero_epi32[5] == 0x0000000000000000ULL && v_mm512_setzero_epi32[6] == 0x0000000000000000ULL && v_mm512_setzero_epi32[7] == 0x0000000000000000ULL);
+}
+
+#endif
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index a5348761ec84e0..b16d610cabe6bb 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1778,3 +1778,16 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) {
// CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
return _mm_xor_si128(A, B);
}
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+void test_constexpr() {
+ constexpr __m128d v_mm_setzero_pd = _mm_setzero_pd();
+ static_assert(v_mm_setzero_pd[0] == +0.0 && v_mm_setzero_pd[1] == +0.0);
+
+ constexpr __m128i v_mm_setzero_si128 = _mm_setzero_si128();
+ static_assert(v_mm_setzero_si128[0] == 0x0000000000000000ULL && v_mm_setzero_si128[1] == 0x0000000000000000ULL);
+}
+
+#endif
More information about the cfe-commits
mailing list