[clang] [X86][RFC] Refactor the SSE intrinsics constexpr tests to simplify future expansion (PR #112578)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 16 09:33:22 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/112578
I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilize SSE/AVX intrinsics in various math libraries etc.
My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests.
However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests.
I'm open to alternative suggestions before I invest too much time getting this work done :)
>From 00231ce54ca6ca539c80d3e79e5b139d5b0ee4f3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 16 Oct 2024 17:31:31 +0100
Subject: [PATCH] [X86][RFC] Refactor the SSE intrinsics constexpr tests to
simplify future expansion
I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilise SSE/AVX intrinsics in various math libraries etc.
My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests.
However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests.
I'm open to alternative suggestions before I invest too much time getting this work done :)
---
clang/test/CodeGen/X86/sse-builtins.c | 135 +++++++---------------
clang/test/CodeGen/X86/sse2-builtins.c | 153 ++++++++-----------------
clang/test/CodeGen/X86/sse3-builtins.c | 34 +++---
3 files changed, 103 insertions(+), 219 deletions(-)
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 391e049a6ae3ef..06077792f770a7 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -6,6 +6,15 @@
#include <immintrin.h>
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+ return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+#else
+#define TEST_CONSTEXPR(...)
+#endif
+
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
__m128 test_mm_add_ps(__m128 A, __m128 B) {
@@ -13,6 +22,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) {
// CHECK: fadd <4 x float>
return _mm_add_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));
__m128 test_mm_add_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_add_ss
@@ -22,12 +32,14 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_add_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));
__m128 test_mm_and_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_and_ps
// CHECK: and <4 x i32>
return _mm_and_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));
__m128 test_mm_andnot_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_andnot_ps
@@ -35,6 +47,7 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
// CHECK: and <4 x i32>
return _mm_andnot_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
@@ -322,6 +335,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvtsi32_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));
+
+__m128 test_mm_cvt_si2ss(__m128 A, int B) {
+ // CHECK-LABEL: test_mm_cvt_si2ss
+ // CHECK: sitofp i32 %{{.*}} to float
+ // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ return _mm_cvt_si2ss(A, B);
+}
+TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));
#ifdef __x86_64__
__m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
@@ -330,6 +352,7 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvtsi64_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
#endif
float test_mm_cvtss_f32(__m128 A) {
@@ -337,6 +360,7 @@ float test_mm_cvtss_f32(__m128 A) {
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
return _mm_cvtss_f32(A);
}
+TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);
int test_mm_cvtss_si32(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_si32
@@ -377,6 +401,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
// CHECK: fdiv <4 x float>
return _mm_div_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));
__m128 test_mm_div_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_div_ss
@@ -386,6 +411,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_div_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));
unsigned int test_MM_GET_EXCEPTION_MASK(void) {
// CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
@@ -517,18 +543,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_move_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));
__m128 test_mm_movehl_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_movehl_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
return _mm_movehl_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));
__m128 test_mm_movelh_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_movelh_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm_movelh_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));
int test_mm_movemask_ps(__m128 A) {
// CHECK-LABEL: test_mm_movemask_ps
@@ -541,6 +570,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
// CHECK: fmul <4 x float>
return _mm_mul_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
__m128 test_mm_mul_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_mul_ss
@@ -550,12 +580,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_mul_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
__m128 test_mm_or_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_or_ps
// CHECK: or <4 x i32>
return _mm_or_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));
void test_mm_prefetch(char const* p) {
// CHECK-LABEL: test_mm_prefetch
@@ -628,6 +660,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set_ps(A, B, C, D);
}
+TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));
__m128 test_mm_set_ps1(float A) {
// CHECK-LABEL: test_mm_set_ps1
@@ -637,6 +670,7 @@ __m128 test_mm_set_ps1(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set_ps1(A);
}
+TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));
void test_MM_SET_ROUNDING_MODE(unsigned int A) {
// CHECK-LABEL: test_MM_SET_ROUNDING_MODE
@@ -657,6 +691,7 @@ __m128 test_mm_set_ss(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
return _mm_set_ss(A);
}
+TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));
__m128 test_mm_set1_ps(float A) {
// CHECK-LABEL: test_mm_set1_ps
@@ -666,6 +701,7 @@ __m128 test_mm_set1_ps(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set1_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));
void test_mm_setcsr(unsigned int A) {
// CHECK-LABEL: test_mm_setcsr
@@ -682,12 +718,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_setr_ps(A, B, C, D);
}
+TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));
__m128 test_mm_setzero_ps(void) {
// CHECK-LABEL: test_mm_setzero_ps
// CHECK: store <4 x float> zeroinitializer
return _mm_setzero_ps();
}
+TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));
void test_mm_sfence(void) {
// CHECK-LABEL: test_mm_sfence
@@ -787,6 +825,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
// CHECK: fsub <4 x float>
return _mm_sub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));
__m128 test_mm_sub_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_sub_ss
@@ -796,6 +835,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_sub_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));
void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
// CHECK-LABEL: test_MM_TRANSPOSE4_PS
@@ -857,107 +897,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
return _mm_unpackhi_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));
__m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_unpacklo_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
return _mm_unpacklo_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));
__m128 test_mm_xor_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_xor_ps
// CHECK: xor <4 x i32>
return _mm_xor_ps(A, B);
}
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
- constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
- constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
- constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
- constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};
-
- constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
- static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);
-
- constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
- static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);
-
- constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
- static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);
-
- constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
- static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);
-
- constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
- static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);
-
- constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
- static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);
-
- constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
- static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
- static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);
-
- constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
- static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
- static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);
-
- constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
- static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
- static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
- static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
- static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
- static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);
-
- constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
- static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);
-
- constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
- static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);
-
- constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
- static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);
-
- constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
- static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);
-
- constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
- static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
- static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
- static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
- static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
- static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);
-
- constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
- static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);
-
- constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
- static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);
-
- static_assert(_mm_cvtss_f32(k2) == +8.0f);
-}
-
-#endif
\ No newline at end of file
+TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 0603ca5f78b6a1..4010894ee6e73d 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -12,6 +12,21 @@
#include <immintrin.h>
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+ return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+constexpr bool match_m128d(__m128d v, double x, double y) {
+ return v[0] == x && v[1] == y;
+}
+constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) {
+ return v[0] == x && v[1] == y;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+#else
+#define TEST_CONSTEXPR(...)
+#endif
+
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
__m128i test_mm_add_epi8(__m128i A, __m128i B) {
@@ -43,6 +58,7 @@ __m128d test_mm_add_pd(__m128d A, __m128d B) {
// CHECK: fadd <2 x double>
return _mm_add_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0));
__m128d test_mm_add_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_add_sd
@@ -52,6 +68,7 @@ __m128d test_mm_add_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_add_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0));
__m128i test_mm_adds_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_adds_epi8
@@ -84,6 +101,7 @@ __m128d test_mm_and_pd(__m128d A, __m128d B) {
// CHECK: and <2 x i64>
return _mm_and_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
__m128i test_mm_and_si128(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_and_si128
@@ -97,6 +115,7 @@ __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
// CHECK: and <2 x i64>
return _mm_andnot_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
__m128i test_mm_andnot_si128(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_andnot_si128
@@ -133,11 +152,13 @@ __m128 test_mm_castpd_ps(__m128d A) {
// CHECK-LABEL: test_mm_castpd_ps
return _mm_castpd_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f));
__m128i test_mm_castpd_si128(__m128d A) {
// CHECK-LABEL: test_mm_castpd_si128
return _mm_castpd_si128(A);
}
+TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL));
__m128d test_mm_castps_pd(__m128 A) {
// CHECK-LABEL: test_mm_castps_pd
@@ -499,12 +520,14 @@ __m128d test_mm_cvtepi32_pd(__m128i A) {
// CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
return _mm_cvtepi32_pd(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0));
__m128 test_mm_cvtepi32_ps(__m128i A) {
// CHECK-LABEL: test_mm_cvtepi32_ps
// CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
return _mm_cvtepi32_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f));
__m128i test_mm_cvtpd_epi32(__m128d A) {
// CHECK-LABEL: test_mm_cvtpd_epi32
@@ -530,12 +553,14 @@ __m128d test_mm_cvtps_pd(__m128 A) {
// CHECK: fpext <2 x float> %{{.*}} to <2 x double>
return _mm_cvtps_pd(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0));
double test_mm_cvtsd_f64(__m128d A) {
// CHECK-LABEL: test_mm_cvtsd_f64
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
return _mm_cvtsd_f64(A);
}
+TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0);
int test_mm_cvtsd_si32(__m128d A) {
// CHECK-LABEL: test_mm_cvtsd_si32
@@ -575,6 +600,7 @@ __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_cvtsi32_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0));
__m128i test_mm_cvtsi32_si128(int A) {
// CHECK-LABEL: test_mm_cvtsi32_si128
@@ -608,6 +634,7 @@ __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_cvtss_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0));
__m128i test_mm_cvttpd_epi32(__m128d A) {
// CHECK-LABEL: test_mm_cvttpd_epi32
@@ -640,6 +667,7 @@ __m128d test_mm_div_pd(__m128d A, __m128d B) {
// CHECK: fdiv <2 x double>
return _mm_div_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0));
__m128d test_mm_div_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_div_sd
@@ -649,6 +677,7 @@ __m128d test_mm_div_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_div_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_div_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +8.0));
// Lowering to pextrw requires optimization.
int test_mm_extract_epi16(__m128i A) {
@@ -873,6 +902,7 @@ __m128d test_mm_move_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_move_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_move_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -4.0, +8.0));
int test_mm_movemask_epi8(__m128i A) {
// CHECK-LABEL: test_mm_movemask_epi8
@@ -899,6 +929,7 @@ __m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
return _mm_mul_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mul_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, +15.0));
__m128d test_mm_mul_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_sd
@@ -908,6 +939,7 @@ __m128d test_mm_mul_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_mul_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mul_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, -3.0));
__m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_mulhi_epi16
@@ -932,6 +964,7 @@ __m128d test_mm_or_pd(__m128d A, __m128d B) {
// CHECK: or <2 x i64> %{{.*}}, %{{.*}}
return _mm_or_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_or_pd((__m128d){+1.0, -3.0}, (__m128d){-0.0, +0.0}), -1.0, -3.0));
__m128i test_mm_or_si128(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_or_si128
@@ -1036,6 +1069,7 @@ __m128d test_mm_set_pd(double A, double B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_set_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_set_pd(-9.0, +3.0), +3.0, -9.0));
__m128d test_mm_set_pd1(double A) {
// CHECK-LABEL: test_mm_set_pd1
@@ -1043,6 +1077,7 @@ __m128d test_mm_set_pd1(double A) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_set_pd1(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_set_pd1(+5.0), +5.0, +5.0));
__m128d test_mm_set_sd(double A) {
// CHECK-LABEL: test_mm_set_sd
@@ -1050,6 +1085,7 @@ __m128d test_mm_set_sd(double A) {
// CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
return _mm_set_sd(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_set_sd(+1.0), +1.0, +0.0));
__m128i test_mm_set1_epi8(char A) {
// CHECK-LABEL: test_mm_set1_epi8
@@ -1114,6 +1150,7 @@ __m128d test_mm_set1_pd(double A) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_set1_pd(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_set1_pd(-42.0), -42.0, -42.0));
__m128i test_mm_setr_epi8(char A, char B, char C, char D,
char E, char F, char G, char H,
@@ -1175,18 +1212,21 @@ __m128d test_mm_setr_pd(double A, double B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_setr_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_setr_pd(-9.0, +3.0), -9.0, +3.0));
__m128d test_mm_setzero_pd(void) {
// CHECK-LABEL: test_mm_setzero_pd
// CHECK: store <2 x double> zeroinitializer
return _mm_setzero_pd();
}
+TEST_CONSTEXPR(match_m128d(_mm_setzero_pd(), +0.0, +0.0));
__m128i test_mm_setzero_si128(void) {
// CHECK-LABEL: test_mm_setzero_si128
// CHECK: store <2 x i64> zeroinitializer
return _mm_setzero_si128();
}
+TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0));
__m128i test_mm_shuffle_epi32(__m128i A) {
// CHECK-LABEL: test_mm_shuffle_epi32
@@ -1620,6 +1660,7 @@ __m128d test_mm_sub_pd(__m128d A, __m128d B) {
// CHECK: fsub <2 x double>
return _mm_sub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_sub_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, +2.0));
__m128d test_mm_sub_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_sub_sd
@@ -1629,6 +1670,7 @@ __m128d test_mm_sub_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_sub_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_sub_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, -3.0));
__m128i test_mm_subs_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_subs_epi8
@@ -1736,6 +1778,7 @@ __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
return _mm_unpackhi_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_unpackhi_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +8.0, -2.0));
__m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_unpacklo_epi8
@@ -1766,123 +1809,17 @@ __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
return _mm_unpacklo_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_unpacklo_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +2.0, -4.0));
__m128d test_mm_xor_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_xor_pd
// CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
return _mm_xor_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_xor_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +1.0, +3.0));
__m128i test_mm_xor_si128(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_xor_si128
// CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
return _mm_xor_si128(A, B);
}
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
- constexpr __m128d kd1 {+2.0,-1.0};
- constexpr __m128d kd2 {-4.0,-2.0};
- constexpr __m128d kd3 {-0.0,+0.0};
-
- constexpr __m128 kf1 {-1.0f,+2.0f,-3.0f,+4.0f};
-
- constexpr __m64 km1 {0x00000080FFFFFFF0ULL}; // -16,+128
- constexpr __m128i ki1 {0x00000010FFFFFFF8ULL, 0x00000001FFFFFFFFULL}; // -8,+16,-1,1
-
- constexpr __m128d v_mm_set_sd = _mm_set_sd(1.0);
- static_assert(v_mm_set_sd[0] == +1.0 && v_mm_set_sd[1] == +0.0);
-
- constexpr __m128d v_mm_set1_pd = _mm_set1_pd(2.0);
- static_assert(v_mm_set1_pd[0] == +2.0 && v_mm_set1_pd[1] == +2.0);
-
- constexpr __m128d v_mm_set_pd1 = _mm_set_pd1(-2.0);
- static_assert(v_mm_set_pd1[0] == -2.0 && v_mm_set_pd1[1] == -2.0);
-
- constexpr __m128d v_mm_set_pd = _mm_set_pd(+2.0, +3.0);
- static_assert(v_mm_set_pd[0] == +3.0 && v_mm_set_pd[1] == +2.0);
-
- constexpr __m128d v_mm_setr_pd = _mm_setr_pd(+2.0, +3.0);
- static_assert(v_mm_setr_pd[0] == +2.0 && v_mm_setr_pd[1] == +3.0);
-
- constexpr __m128d v_mm_setzero_pd = _mm_setzero_pd();
- static_assert(v_mm_setzero_pd[0] == +0.0 && v_mm_setzero_pd[1] == +0.0);
-
- constexpr __m128i v_mm_setzero_si128 = _mm_setzero_si128();
- static_assert(v_mm_setzero_si128[0] == 0x0000000000000000ULL && v_mm_setzero_si128[1] == 0x0000000000000000ULL);
-
- constexpr __m128d v_mm_add_sd = _mm_add_sd(kd1, kd2);
- static_assert(v_mm_add_sd[0] == -2.0 && v_mm_add_sd[1] == -1.0);
-
- constexpr __m128d v_mm_add_pd = _mm_add_pd(kd1, kd2);
- static_assert(v_mm_add_pd[0] == -2.0 && v_mm_add_pd[1] == -3.0);
-
- constexpr __m128d v_mm_sub_sd = _mm_sub_sd(kd1, kd2);
- static_assert(v_mm_sub_sd[0] == +6.0 && v_mm_sub_sd[1] == -1.0);
-
- constexpr __m128d v_mm_sub_pd = _mm_sub_pd(kd1, kd2);
- static_assert(v_mm_sub_pd[0] == +6.0 && v_mm_sub_pd[1] == +1.0);
-
- constexpr __m128d v_mm_mul_sd = _mm_mul_sd(kd1, kd2);
- static_assert(v_mm_mul_sd[0] == -8.0 && v_mm_mul_sd[1] == -1.0);
-
- constexpr __m128d v_mm_mul_pd = _mm_mul_pd(kd1, kd2);
- static_assert(v_mm_mul_pd[0] == -8.0 && v_mm_mul_pd[1] == +2.0);
-
- constexpr __m128d v_mm_div_sd = _mm_div_sd(kd1, kd2);
- static_assert(v_mm_div_sd[0] == -0.5 && v_mm_div_sd[1] == -1.0);
-
- constexpr __m128d v_mm_div_pd = _mm_div_pd(kd1, kd2);
- static_assert(v_mm_div_pd[0] == -0.5 && v_mm_div_pd[1] == +0.5);
-
- constexpr __m128d v_mm_and_pd = _mm_and_pd(kd1, kd3);
- static_assert(v_mm_and_pd[0] == +0.0 && v_mm_and_pd[1] == +0.0);
-
- constexpr __m128d v_mm_andnot_pd = _mm_andnot_pd(kd1, kd3);
- static_assert(v_mm_andnot_pd[0] == -0.0 && v_mm_andnot_pd[1] == +0.0);
-
- constexpr __m128d v_mm_or_pd = _mm_or_pd(kd1, kd3);
- static_assert(v_mm_or_pd[0] == -2.0 && v_mm_or_pd[1] == -1.0);
-
- constexpr __m128d v_mm_xor_pd = _mm_xor_pd(kd2, kd3);
- static_assert(v_mm_xor_pd[0] == +4.0 && v_mm_xor_pd[1] == -2.0);
-
- constexpr __m128d v_mm_cvtps_pd = _mm_cvtps_pd(kf1);
- static_assert(v_mm_cvtps_pd[0] == -1.0 && v_mm_cvtps_pd[1] == +2.0);
-
- constexpr __m128d v_mm_cvtepi32_pd = _mm_cvtepi32_pd(ki1);
- static_assert(v_mm_cvtepi32_pd[0] == -8.0 && v_mm_cvtepi32_pd[1] == +16.0);
-
- constexpr __m128 v_mm_cvtepi32_ps = _mm_cvtepi32_ps(ki1);
- static_assert(v_mm_cvtepi32_ps[0] == -8.0f && v_mm_cvtepi32_ps[1] == +16.0f && v_mm_cvtepi32_ps[2] == -1.0f && v_mm_cvtepi32_ps[3] == +1.0f);
-
- constexpr __m128d v_mm_cvtsi32_sd = _mm_cvtsi32_sd(kd1, 8);
- static_assert(v_mm_cvtsi32_sd[0] == +8.0 && v_mm_cvtsi32_sd[1] == -1.0);
-
- constexpr __m128d v_mm_cvtss_sd = _mm_cvtss_sd(kd2, kf1);
- static_assert(v_mm_cvtss_sd[0] == -1.0 && v_mm_cvtss_sd[1] == -2.0);
-
- constexpr __m128d v_mm_cvtpi32_pd = _mm_cvtpi32_pd(km1);
- static_assert(v_mm_cvtpi32_pd[0] == -16.0 && v_mm_cvtpi32_pd[1] == 128.0);
-
- static_assert(_mm_cvtsd_f64(kd2) == -4.0);
-
- constexpr __m128d v_mm_move_sd = _mm_move_sd(kd1, kd2);
- static_assert(v_mm_move_sd[0] == -4.0 && v_mm_move_sd[1] == -1.0);
-
- constexpr __m128d v_mm_unpackhi_pd = _mm_unpackhi_pd(kd1, kd2);
- static_assert(v_mm_unpackhi_pd[0] == -1.0f && v_mm_unpackhi_pd[1] == -2.0f);
-
- constexpr __m128d v_mm_unpacklo_pd = _mm_unpacklo_pd(kd1, kd2);
- static_assert(v_mm_unpacklo_pd[0] == +2.0f && v_mm_unpacklo_pd[1] == -4.0f);
-
- constexpr __m128 v_mm_castpd_ps = _mm_castpd_ps(kd3);
- static_assert(v_mm_castpd_ps[0] == -0.0f && v_mm_castpd_ps[1] == +0.0f && v_mm_castpd_ps[2] == +0.0f && v_mm_castpd_ps[3] == +0.0f);
-
- constexpr __m128i v_mm_castpd_si128 = _mm_castpd_si128(kd3);
- static_assert(v_mm_castpd_si128[0] == 0x8000000000000000ULL && v_mm_castpd_si128[1] == 0x0000000000000000ULL);
-}
-
-#endif
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index 18c062f4c14a7d..27327aa64a9107 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -6,6 +6,18 @@
#include <immintrin.h>
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+ return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+constexpr bool match_m128d(__m128d v, double x, double y) {
+ return v[0] == x && v[1] == y;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+#else
+#define TEST_CONSTEXPR(...)
+#endif
+
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
@@ -63,34 +75,18 @@ __m128d test_mm_movedup_pd(__m128d A) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
return _mm_movedup_pd(A);
}
+TEST_CONSTEXPR(match_m128d(_mm_movedup_pd((__m128d){+7.0, -7.0}), +7.0, +7.0));
__m128 test_mm_movehdup_ps(__m128 A) {
// CHECK-LABEL: test_mm_movehdup_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
return _mm_movehdup_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_movehdup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), -1.0f, -1.0f, +4.0f, +4.0f));
__m128 test_mm_moveldup_ps(__m128 A) {
// CHECK-LABEL: test_mm_moveldup_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
return _mm_moveldup_ps(A);
}
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
- constexpr __m128d kd1 {+7.0,-7.0};
- constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f};
-
- constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1);
- static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0);
-
- constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1);
- static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f);
-
- constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1);
- static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f);
-}
-
-#endif
+TEST_CONSTEXPR(match_m128(_mm_moveldup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), +1.0f, +1.0f, +2.0f, +2.0f));
More information about the cfe-commits
mailing list