[clang] e281d96 - [clang][x86] Add constexpr support for _mm_add_epi32/64 and _mm_sub_epi32/64
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 29 07:34:37 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-29T14:34:19Z
New Revision: e281d96a81bca896692da4a07ca1423ee6dc1f53
URL: https://github.com/llvm/llvm-project/commit/e281d96a81bca896692da4a07ca1423ee6dc1f53
DIFF: https://github.com/llvm/llvm-project/commit/e281d96a81bca896692da4a07ca1423ee6dc1f53.diff
LOG: [clang][x86] Add constexpr support for _mm_add_epi32/64 and _mm_sub_epi32/64
Added:
Modified:
clang/lib/Headers/emmintrin.h
clang/test/CodeGen/X86/builtin_test_helpers.h
clang/test/CodeGen/X86/sse2-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index d6494762169b25..778cdf99a12964 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2110,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [4 x i32] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4su)__a + (__v4su)__b);
}
@@ -2147,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
/// A 128-bit vector of [2 x i64].
/// \returns A 128-bit vector of [2 x i64] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi64(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a + (__v2du)__b);
}
@@ -2539,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the
diff erences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4su)__a - (__v4su)__b);
}
@@ -2573,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the
diff erences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi64(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a - (__v2du)__b);
}
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index 5e77ff3a7ca45e..f6547d4cb29ca3 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -16,6 +16,15 @@ constexpr bool match_m128i(__m128i v, unsigned long long a, unsigned long long b
return v[0] == a && v[1] == b;
}
+constexpr bool match_v2di(__m128i v, long long a, long long b) {
+ return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
+ __v4si v = (__v4si)_v;
+ return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4287d3d4b5ec4e..82aa7a2d2b49d1 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -32,12 +32,14 @@ __m128i test_mm_add_epi32(__m128i A, __m128i B) {
// CHECK: add <4 x i32>
return _mm_add_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8));
__m128i test_mm_add_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_add_epi64
// CHECK: add <2 x i64>
return _mm_add_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5));
__m128d test_mm_add_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_add_pd
@@ -1634,12 +1636,14 @@ __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
// CHECK: sub <4 x i32>
return _mm_sub_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0));
__m128i test_mm_sub_epi64(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sub_epi64
// CHECK: sub <2 x i64>
return _mm_sub_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11));
__m128d test_mm_sub_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_sub_pd
More information about the cfe-commits
mailing list