[clang] e281d96 - [clang][x86] Add constexpr support for _mm_add_epi32/64 and _mm_sub_epi32/64

Tue Oct 29 07:34:37 PDT 2024

Author: Simon Pilgrim
Date: 2024-10-29T14:34:19Z
New Revision: e281d96a81bca896692da4a07ca1423ee6dc1f53

URL: https://github.com/llvm/llvm-project/commit/e281d96a81bca896692da4a07ca1423ee6dc1f53
DIFF: https://github.com/llvm/llvm-project/commit/e281d96a81bca896692da4a07ca1423ee6dc1f53.diff

LOG: [clang][x86] Add constexpr support for _mm_add_epi32/64 and _mm_sub_epi32/64

Added: 
    

Modified: 
    clang/lib/Headers/emmintrin.h
    clang/test/CodeGen/X86/builtin_test_helpers.h
    clang/test/CodeGen/X86/sse2-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index d6494762169b25..778cdf99a12964 100644

--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2110,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit vector of [4 x i32] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4su)__a + (__v4su)__b);
 }
 
@@ -2147,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
 ///    A 128-bit vector of [2 x i64].
 /// \returns A 128-bit vector of [2 x i64] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi64(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a + (__v2du)__b);
 }
 
@@ -2539,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the 
diff erences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4su)__a - (__v4su)__b);
 }
 
@@ -2573,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the 
diff erences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi64(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a - (__v2du)__b);
 }
 

diff  --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index 5e77ff3a7ca45e..f6547d4cb29ca3 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -16,6 +16,15 @@ constexpr bool match_m128i(__m128i v, unsigned long long a, unsigned long long b
   return v[0] == a && v[1] == b;
 }
 
+constexpr bool match_v2di(__m128i v, long long a, long long b) {
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
+  __v4si v = (__v4si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
 constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) {
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
 }

diff  --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4287d3d4b5ec4e..82aa7a2d2b49d1 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -32,12 +32,14 @@ __m128i test_mm_add_epi32(__m128i A, __m128i B) {
   // CHECK: add <4 x i32>
   return _mm_add_epi32(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8));
 
 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_add_epi64
   // CHECK: add <2 x i64>
   return _mm_add_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5));
 
 __m128d test_mm_add_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_add_pd
@@ -1634,12 +1636,14 @@ __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
   // CHECK: sub <4 x i32>
   return _mm_sub_epi32(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0));
 
 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_sub_epi64
   // CHECK: sub <2 x i64>
   return _mm_sub_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11));
 
 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_sub_pd