[clang] e3e5517 - [clang][x86] Enable _mm_movehdup_ps, _mm_moveldup_ps and _mm_movedup_pd in constant expressions
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 8 03:21:39 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-08T11:16:13+01:00
New Revision: e3e55173ed891da334990103c12206a4e4e8fc98
URL: https://github.com/llvm/llvm-project/commit/e3e55173ed891da334990103c12206a4e4e8fc98
DIFF: https://github.com/llvm/llvm-project/commit/e3e55173ed891da334990103c12206a4e4e8fc98.diff
LOG: [clang][x86] Enable _mm_movehdup_ps, _mm_moveldup_ps and _mm_movedup_pd in constant expressions
These just wrap generic shuffles
Added:
Modified:
clang/lib/Headers/pmmintrin.h
clang/test/CodeGen/X86/sse3-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 9ad76579668b35..cd605df7fb52d8 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -27,6 +27,12 @@
__min_vector_width__(128)))
#endif
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
/// Loads data from an unaligned memory location to elements in a 128-bit
/// vector.
///
@@ -128,7 +134,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
/// destination.
/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
/// values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movehdup_ps(__m128 __a)
{
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
@@ -149,7 +155,7 @@ _mm_movehdup_ps(__m128 __a)
/// destination.
/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
/// values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_moveldup_ps(__m128 __a)
{
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
@@ -250,7 +256,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
/// [127:64] and [63:0] of the destination.
/// \returns A 128-bit vector of [2 x double] containing the moved and
/// duplicated values.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movedup_pd(__m128d __a)
{
return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
@@ -303,5 +309,6 @@ _mm_mwait(unsigned __extensions, unsigned __hints)
}
#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif /* __PMMINTRIN_H */
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index 57a18dadefaa89..18c062f4c14a7d 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -75,3 +75,22 @@ __m128 test_mm_moveldup_ps(__m128 A) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
return _mm_moveldup_ps(A);
}
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+void test_constexpr() {
+ constexpr __m128d kd1 {+7.0,-7.0};
+ constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f};
+
+ constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1);
+ static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0);
+
+ constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1);
+ static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f);
+
+ constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1);
+ static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f);
+}
+
+#endif
More information about the cfe-commits
mailing list