[clang] [X86][RFC] Refactor the SSE intrinsics constexpr tests to simplify future expansion (PR #112578)

via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 16 09:34:02 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilize SSE/AVX intrinsics in various math libraries etc.

My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests.

However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests.

I'm open to alternative suggestions before I invest too much time getting this work done :)

---

Patch is 32.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112578.diff


3 Files Affected:

- (modified) clang/test/CodeGen/X86/sse-builtins.c (+43-92) 
- (modified) clang/test/CodeGen/X86/sse2-builtins.c (+45-108) 
- (modified) clang/test/CodeGen/X86/sse3-builtins.c (+15-19) 


``````````diff
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 391e049a6ae3ef..06077792f770a7 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -6,6 +6,15 @@
 
 #include <immintrin.h>
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+  return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+#else
+#define TEST_CONSTEXPR(...)
+#endif
+
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
 
 __m128 test_mm_add_ps(__m128 A, __m128 B) {
@@ -13,6 +22,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) {
   // CHECK: fadd <4 x float>
   return _mm_add_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));
 
 __m128 test_mm_add_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_add_ss
@@ -22,12 +32,14 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_add_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));
 
 __m128 test_mm_and_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_and_ps
   // CHECK: and <4 x i32>
   return _mm_and_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));
 
 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_andnot_ps
@@ -35,6 +47,7 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
   // CHECK: and <4 x i32>
   return _mm_andnot_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
   // CHECK-LABEL: test_mm_cmp_ps_eq_oq
@@ -322,6 +335,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtsi32_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));
+
+__m128 test_mm_cvt_si2ss(__m128 A, int B) {
+  // CHECK-LABEL: test_mm_cvt_si2ss
+  // CHECK: sitofp i32 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+  return _mm_cvt_si2ss(A, B);
+}
+TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));
 
 #ifdef __x86_64__
 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
@@ -330,6 +352,7 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtsi64_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
 #endif
 
 float test_mm_cvtss_f32(__m128 A) {
@@ -337,6 +360,7 @@ float test_mm_cvtss_f32(__m128 A) {
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _mm_cvtss_f32(A);
 }
+TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);
 
 int test_mm_cvtss_si32(__m128 A) {
   // CHECK-LABEL: test_mm_cvtss_si32
@@ -377,6 +401,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
   // CHECK: fdiv <4 x float>
   return _mm_div_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));
 
 __m128 test_mm_div_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_div_ss
@@ -386,6 +411,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_div_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));
 
 unsigned int test_MM_GET_EXCEPTION_MASK(void) {
   // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
@@ -517,18 +543,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_move_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));
 
 __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_movehl_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
   return _mm_movehl_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));
 
 __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_movelh_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   return _mm_movelh_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));
 
 int test_mm_movemask_ps(__m128 A) {
   // CHECK-LABEL: test_mm_movemask_ps
@@ -541,6 +570,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
   // CHECK: fmul <4 x float>
   return _mm_mul_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
 
 __m128 test_mm_mul_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_mul_ss
@@ -550,12 +580,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_mul_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
 
 __m128 test_mm_or_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_or_ps
   // CHECK: or <4 x i32>
   return _mm_or_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));
 
 void test_mm_prefetch(char const* p) {
   // CHECK-LABEL: test_mm_prefetch
@@ -628,6 +660,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set_ps(A, B, C, D);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));
 
 __m128 test_mm_set_ps1(float A) {
   // CHECK-LABEL: test_mm_set_ps1
@@ -637,6 +670,7 @@ __m128 test_mm_set_ps1(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set_ps1(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));
 
 void test_MM_SET_ROUNDING_MODE(unsigned int A) {
   // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
@@ -657,6 +691,7 @@ __m128 test_mm_set_ss(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
   return _mm_set_ss(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));
 
 __m128 test_mm_set1_ps(float A) {
   // CHECK-LABEL: test_mm_set1_ps
@@ -666,6 +701,7 @@ __m128 test_mm_set1_ps(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set1_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));
 
 void test_mm_setcsr(unsigned int A) {
   // CHECK-LABEL: test_mm_setcsr
@@ -682,12 +718,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_setr_ps(A, B, C, D);
 }
+TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));
 
 __m128 test_mm_setzero_ps(void) {
   // CHECK-LABEL: test_mm_setzero_ps
   // CHECK: store <4 x float> zeroinitializer
   return _mm_setzero_ps();
 }
+TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));
 
 void test_mm_sfence(void) {
   // CHECK-LABEL: test_mm_sfence
@@ -787,6 +825,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
   // CHECK: fsub <4 x float>
   return _mm_sub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));
 
 __m128 test_mm_sub_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_sub_ss
@@ -796,6 +835,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_sub_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));
 
 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
   // CHECK-LABEL: test_MM_TRANSPOSE4_PS
@@ -857,107 +897,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   return _mm_unpackhi_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));
 
 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_unpacklo_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   return _mm_unpacklo_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));
 
 __m128 test_mm_xor_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_xor_ps
   // CHECK: xor <4 x i32>
   return _mm_xor_ps(A, B);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
-  constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
-  constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
-  constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};
-
-  constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
-  static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);
-
-  constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
-  static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);
-
-  constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
-  static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);
-
-  constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
-  static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
-  static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);
-
-  constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
-  static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
-  static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
-  static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);
-
-  constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
-  static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
-  static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);
-
-  constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
-  static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
-  static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
-  static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
-  static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
-  static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
-  static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
-  static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);
-
-  constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
-  static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);
-
-  constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
-  static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);
-
-  constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
-  static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
-  static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
-  static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
-  static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
-  static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
-  static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);
-
-  constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
-  static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);
-
-  static_assert(_mm_cvtss_f32(k2) == +8.0f);
-}
-
-#endif
\ No newline at end of file
+TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 0603ca5f78b6a1..4010894ee6e73d 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -12,6 +12,21 @@
 
 #include <immintrin.h>
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+  return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+constexpr bool match_m128d(__m128d v, double x, double y) {
+  return v[0] == x && v[1] == y;
+}
+constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) {
+  return v[0] == x && v[1] == y;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+#else
+#define TEST_CONSTEXPR(...)
+#endif
+
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
 
 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
@@ -43,6 +58,7 @@ __m128d test_mm_add_pd(__m128d A, __m128d B) {
   // CHECK: fadd <2 x double>
   return _mm_add_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0));
 
 __m128d test_mm_add_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_add_sd
@@ -52,6 +68,7 @@ __m128d test_mm_add_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_add_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0));
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
@@ -84,6 +101,7 @@ __m128d test_mm_and_pd(__m128d A, __m128d B) {
   // CHECK: and <2 x i64>
   return _mm_and_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
 
 __m128i test_mm_and_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_and_si128
@@ -97,6 +115,7 @@ __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
   // CHECK: and <2 x i64>
   return _mm_andnot_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
 
 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_andnot_si128
@@ -133,11 +152,13 @@ __m128 test_mm_castpd_ps(__m128d A) {
   // CHECK-LABEL: test_mm_castpd_ps
   return _mm_castpd_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f));
 
 __m128i test_mm_castpd_si128(__m128d A) {
   // CHECK-LABEL: test_mm_castpd_si128
   return _mm_castpd_si128(A);
 }
+TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL));
 
 __m128d test_mm_castps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_castps_pd
@@ -499,12 +520,14 @@ __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0));
 
 __m128 test_mm_cvtepi32_ps(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_ps
   // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
   return _mm_cvtepi32_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f));
 
 __m128i test_mm_cvtpd_epi32(__m128d A) {
   // CHECK-LABEL: test_mm_cvtpd_epi32
@@ -530,12 +553,14 @@ __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0));
 
 double test_mm_cvtsd_f64(__m128d A) {
   // CHECK-LABEL: test_mm_cvtsd_f64
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
   return _mm_cvtsd_f64(A);
 }
+TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0);
 
 int test_mm_cvtsd_si32(__m128d A) {
   // CHECK-LABEL: test_mm_cvtsd_si32
@@ -575,6 +600,7 @@ __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtsi32_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0));
 
 __m128i test_mm_cvtsi32_si128(int A) {
   // CHECK-LABEL: test_mm_cvtsi32_si128
@@ -608,6 +634,7 @@ __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtss_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0));
 
 __m128i test_mm_cvttpd_epi32(__m128d A) {
   // CHECK-LABEL: test_mm_cvttpd_epi32
@@ -640,6 +667,7 @@ __m128d test_mm_div_pd(__m128d A, __m128d B) {
   // CHECK: fdiv <2 x double>
   return _mm_div_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0));
 
 __m128d test_mm_div_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_div_sd
@@ -649,6 +677,7 @@ __m128d test_mm_div_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/112578


More information about the cfe-commits mailing list