[clang] e3e5517 - [clang][x86] Enable _mm_movehdup_ps, _mm_moveldup_ps and _mm_movedup_pd in constant expressions

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 8 03:21:39 PDT 2024


Author: Simon Pilgrim
Date: 2024-10-08T11:16:13+01:00
New Revision: e3e55173ed891da334990103c12206a4e4e8fc98

URL: https://github.com/llvm/llvm-project/commit/e3e55173ed891da334990103c12206a4e4e8fc98
DIFF: https://github.com/llvm/llvm-project/commit/e3e55173ed891da334990103c12206a4e4e8fc98.diff

LOG: [clang][x86] Enable _mm_movehdup_ps, _mm_moveldup_ps and _mm_movedup_pd in constant expressions

These just wrap generic shuffles

Added: 
    

Modified: 
    clang/lib/Headers/pmmintrin.h
    clang/test/CodeGen/X86/sse3-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 9ad76579668b35..cd605df7fb52d8 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -27,6 +27,12 @@
                  __min_vector_width__(128)))
 #endif
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
 /// Loads data from an unaligned memory location to elements in a 128-bit
 ///    vector.
 ///
@@ -128,7 +134,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
 ///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_movehdup_ps(__m128 __a)
 {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
@@ -149,7 +155,7 @@ _mm_movehdup_ps(__m128 __a)
 ///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
 ///    values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_moveldup_ps(__m128 __a)
 {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
@@ -250,7 +256,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
 ///    [127:64] and [63:0] of the destination.
 /// \returns A 128-bit vector of [2 x double] containing the moved and
 ///    duplicated values.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_movedup_pd(__m128d __a)
 {
   return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
@@ -303,5 +309,6 @@ _mm_mwait(unsigned __extensions, unsigned __hints)
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif /* __PMMINTRIN_H */

diff  --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index 57a18dadefaa89..18c062f4c14a7d 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -75,3 +75,22 @@ __m128 test_mm_moveldup_ps(__m128 A) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   return _mm_moveldup_ps(A);
 }
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+void test_constexpr() {
+  constexpr __m128d kd1 {+7.0,-7.0};
+  constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f};
+
+  constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1);
+  static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0);
+
+  constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1);
+  static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f);
+
+  constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1);
+  static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f);
+}
+
+#endif


        


More information about the cfe-commits mailing list