[clang] bf6c483 - [clang][x86] Add constexpr support for SSE2 _mm_set*_epi* intrinsics
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 29 08:39:38 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-29T15:39:15Z
New Revision: bf6c483e4714841b1511ea3666f05a468bd988fe
URL: https://github.com/llvm/llvm-project/commit/bf6c483e4714841b1511ea3666f05a468bd988fe
DIFF: https://github.com/llvm/llvm-project/commit/bf6c483e4714841b1511ea3666f05a468bd988fe.diff
LOG: [clang][x86] Add constexpr support for SSE2 _mm_set*_epi* intrinsics
Added:
Modified:
clang/lib/Headers/emmintrin.h
clang/test/CodeGen/X86/builtin_test_helpers.h
clang/test/CodeGen/X86/sse2-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 778cdf99a12964..4f00b7f1a8d9d4 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -3512,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
/// destination vector of [2 x i64].
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
/// provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
- long long __q0) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set_epi64x(long long __q1, long long __q0) {
return __extension__(__m128i)(__v2di){__q0, __q1};
}
@@ -3533,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
/// destination vector of [2 x i64].
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
/// provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
- __m64 __q0) {
- return _mm_set_epi64x((long long)__q1, (long long)__q0);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set_epi64(__m64 __q1, __m64 __q0) {
+ return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]);
}
/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
@@ -3560,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
/// vector.
/// \returns An initialized 128-bit vector of [4 x i32] containing the values
/// provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
- int __i1, int __i0) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3,
+ int __i2,
+ int __i1,
+ int __i0) {
return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
}
@@ -3599,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
/// vector.
/// \returns An initialized 128-bit vector of [8 x i16] containing the values
/// provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
short __w2, short __w1, short __w0) {
return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
@@ -3648,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
/// Initializes bits [7:0] of the destination vector.
/// \returns An initialized 128-bit vector of [16 x i8] containing the values
/// provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
char __b4, char __b3, char __b2, char __b1, char __b0) {
@@ -3670,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
/// vector.
/// \returns An initialized 128-bit integer vector of [2 x i64] with both
/// elements containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi64x(long long __q) {
return _mm_set_epi64x(__q, __q);
}
@@ -3687,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
/// vector.
/// \returns An initialized 128-bit vector of [2 x i64] with all elements
/// containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi64(__m64 __q) {
return _mm_set_epi64(__q, __q);
}
@@ -3704,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
/// vector.
/// \returns An initialized 128-bit vector of [4 x i32] with all elements
/// containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) {
return _mm_set_epi32(__i, __i, __i, __i);
}
@@ -3721,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
/// vector.
/// \returns An initialized 128-bit vector of [8 x i16] with all elements
/// containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi16(short __w) {
return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
}
@@ -3738,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
/// vector.
/// \returns An initialized 128-bit vector of [16 x i8] with all elements
/// containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) {
return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
__b, __b, __b, __b, __b);
}
@@ -3757,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
/// A 64-bit integral value used to initialize the upper 64 bits of the
/// result.
/// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
- __m64 __q1) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_setr_epi64(__m64 __q0, __m64 __q1) {
return _mm_set_epi64(__q1, __q0);
}
@@ -3779,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
/// \param __i3
/// A 32-bit integral value used to initialize bits [127:96] of the result.
/// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
- int __i2,
- int __i3) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) {
return _mm_set_epi32(__i3, __i2, __i1, __i0);
}
@@ -3810,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
/// \param __w7
/// A 16-bit integral value used to initialize bits [127:112] of the result.
/// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
short __w5, short __w6, short __w7) {
return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
@@ -3857,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
/// \param __b15
/// An 8-bit integral value used to initialize bits [127:120] of the result.
/// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7, char __b8, char __b9, char __b10,
char __b11, char __b12, char __b13, char __b14, char __b15) {
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index f6547d4cb29ca3..01800db33afbb9 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -25,6 +25,16 @@ constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}
+constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) {
+ __v8hi v = (__v8hi)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16qi(__m128i _v, char a, char b, char c, char d, char e, char f, char g, char h, char i, char j, char k, char l, char m, char n, char o, char p) {
+ __v16qi v = (__v16qi)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 82aa7a2d2b49d1..c4493a49120543 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1013,6 +1013,7 @@ __m128i test_mm_set_epi8(char A, char B, char C, char D,
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
}
+TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
__m128i test_mm_set_epi16(short A, short B, short C, short D,
short E, short F, short G, short H) {
@@ -1027,6 +1028,7 @@ __m128i test_mm_set_epi16(short A, short B, short C, short D,
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
return _mm_set_epi16(A, B, C, D, E, F, G, H);
}
+TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0));
__m128i test_mm_set_epi32(int A, int B, int C, int D) {
// CHECK-LABEL: test_mm_set_epi32
@@ -1036,6 +1038,7 @@ __m128i test_mm_set_epi32(int A, int B, int C, int D) {
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
return _mm_set_epi32(A, B, C, D);
}
+TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1));
__m128i test_mm_set_epi64(__m64 A, __m64 B) {
// CHECK-LABEL: test_mm_set_epi64
@@ -1043,6 +1046,7 @@ __m128i test_mm_set_epi64(__m64 A, __m64 B) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_set_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1));
__m128i test_mm_set_epi64x(long long A, long long B) {
// CHECK-LABEL: test_mm_set_epi64x
@@ -1050,6 +1054,7 @@ __m128i test_mm_set_epi64x(long long A, long long B) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_set_epi64x(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100));
__m128d test_mm_set_pd(double A, double B) {
// CHECK-LABEL: test_mm_set_pd
@@ -1095,6 +1100,7 @@ __m128i test_mm_set1_epi8(char A) {
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
return _mm_set1_epi8(A);
}
+TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99));
__m128i test_mm_set1_epi16(short A) {
// CHECK-LABEL: test_mm_set1_epi16
@@ -1108,6 +1114,7 @@ __m128i test_mm_set1_epi16(short A) {
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
return _mm_set1_epi16(A);
}
+TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128));
__m128i test_mm_set1_epi32(int A) {
// CHECK-LABEL: test_mm_set1_epi32
@@ -1117,6 +1124,7 @@ __m128i test_mm_set1_epi32(int A) {
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
return _mm_set1_epi32(A);
}
+TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55));
__m128i test_mm_set1_epi64(__m64 A) {
// CHECK-LABEL: test_mm_set1_epi64
@@ -1124,6 +1132,7 @@ __m128i test_mm_set1_epi64(__m64 A) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_set1_epi64(A);
}
+TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535));
__m128i test_mm_set1_epi64x(long long A) {
// CHECK-LABEL: test_mm_set1_epi64x
@@ -1131,6 +1140,7 @@ __m128i test_mm_set1_epi64x(long long A) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_set1_epi64x(A);
}
+TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536));
__m128d test_mm_set1_pd(double A) {
// CHECK-LABEL: test_mm_set1_pd
@@ -1163,6 +1173,7 @@ __m128i test_mm_setr_epi8(char A, char B, char C, char D,
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
}
+TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
__m128i test_mm_setr_epi16(short A, short B, short C, short D,
short E, short F, short G, short H) {
@@ -1177,6 +1188,7 @@ __m128i test_mm_setr_epi16(short A, short B, short C, short D,
// CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
return _mm_setr_epi16(A, B, C, D, E, F, G, H);
}
+TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7));
__m128i test_mm_setr_epi32(int A, int B, int C, int D) {
// CHECK-LABEL: test_mm_setr_epi32
@@ -1186,6 +1198,7 @@ __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
return _mm_setr_epi32(A, B, C, D);
}
+TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7));
__m128i test_mm_setr_epi64(__m64 A, __m64 B) {
// CHECK-LABEL: test_mm_setr_epi64
@@ -1193,6 +1206,7 @@ __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_setr_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42));
__m128d test_mm_setr_pd(double A, double B) {
// CHECK-LABEL: test_mm_setr_pd
More information about the cfe-commits
mailing list