[clang] [X86] Add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr (PR #156822)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 25 22:52:26 PDT 2025
https://github.com/whytolearn updated https://github.com/llvm/llvm-project/pull/156822
>From a81c4068096b960de65c3517f18d2d31004afbce Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Thu, 4 Sep 2025 15:52:57 +0800
Subject: [PATCH 1/6] deal this issues 155395
VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX
PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #155395
---
clang/lib/Headers/avx2intrin.h | 27 ++++-----
clang/lib/Headers/avxintrin.h | 11 ++--
clang/lib/Headers/pmmintrin.h | 20 +++----
clang/lib/Headers/tmmintrin.h | 80 +++++++++++--------------
clang/test/CodeGen/X86/avx-builtins.c | 29 +++++++++
clang/test/CodeGen/X86/avx2-builtins.c | 63 +++++++++++++++++++
clang/test/CodeGen/X86/mmx-builtins.c | 48 +++++++++++++++
clang/test/CodeGen/X86/ssse3-builtins.c | 49 +++++++++++++++
8 files changed, 250 insertions(+), 77 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 384faa35d246f..f8fb808f7f29c 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -854,10 +854,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -886,7 +885,7 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_hadd_epi32(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
@@ -921,10 +920,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadds_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadds_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -957,10 +955,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -989,7 +986,7 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_hsub_epi32(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
@@ -1025,7 +1022,7 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_hsubs_epi16(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 26096da949447..976710a64e80e 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -703,7 +703,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
/// elements of a vector of [4 x double].
/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
/// both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_hadd_pd(__m256d __a, __m256d __b)
{
return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
@@ -726,9 +726,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
/// index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
/// both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hadd_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hadd_ps(__m256 __a, __m256 __b) {
return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
}
@@ -749,7 +748,7 @@ _mm256_hadd_ps(__m256 __a, __m256 __b)
/// odd-indexed elements of a vector of [4 x double].
/// \returns A 256-bit vector of [4 x double] containing the horizontal
/// differences of both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_hsub_pd(__m256d __a, __m256d __b)
{
return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
@@ -772,7 +771,7 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
/// elements with index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal
/// differences of both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_hsub_ps(__m256 __a, __m256 __b)
{
return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index cd605df7fb52d..400b28bb877a1 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -89,9 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
/// destination.
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
/// both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hadd_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_ps(__m128 __a, __m128 __b) {
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
}
@@ -174,9 +173,8 @@ _mm_moveldup_ps(__m128 __a)
/// A 128-bit vector of [2 x double] containing the right source operand.
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
/// and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_addsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
@@ -197,9 +195,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
/// destination.
/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
/// both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hadd_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
}
@@ -220,9 +217,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
/// the destination.
/// \returns A 128-bit vector of [2 x double] containing the horizontal
/// differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index f01c61afa8ea2..d79f7f6ea4091 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -204,10 +204,10 @@ _mm_abs_epi32(__m128i __a) {
/// destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
/// both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddw128(
+ (__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -227,10 +227,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
/// both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -250,11 +249,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+ _mm_hadd_pi16(__m64 __a, __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -274,7 +272,7 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hadd_pi32(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_phaddd128(
@@ -301,10 +299,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
/// destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadds_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadds_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -327,7 +324,7 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hadds_pi16(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_phaddsw128(
@@ -351,10 +348,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -374,10 +370,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -397,7 +392,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hsub_pi16(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_phsubw128(
@@ -421,7 +416,7 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hsub_pi32(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_phsubd128(
@@ -448,10 +443,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsubs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsubs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -474,7 +468,7 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hsubs_pi16(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_phsubsw128(
@@ -509,10 +503,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maddubs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -539,11 +532,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_maddubs_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_pi16(__m64 __a, __m64 __b) {
+ return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
+ (__v16qi)__anyext128(__b)));
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -560,7 +552,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
/// A 128-bit vector of [8 x i16] containing one of the source operands.
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
/// products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 4a048744faa61..f381faebededf 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1083,24 +1083,53 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hadd_pd(A, B);
}
+constexpr bool test_mm256_hadd_epi32_constexpr() {
+ constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
+ constexpr __m256d result = _mm256_hadd_pd(a, b);
+ return match_m256d(result,1.0+2.0,3.0+4.0,5.0+6.0,7.0+8.0);
+}
+TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hadd_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hadd_ps(A, B);
}
+constexpr bool test_mm256_hadd_ps_constexpr() {
+ constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
+ constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
+ constexpr __m256 result = _mm256_hadd_ps(a, b);
+ return match_m256(result,1.0f+2.0f,3.0f+4.0f,5.0f+6.0f,7.0f+8.0f,
+ 9.0f+10.0f,11.0f+12.0f,13.0f+14.0f,15.0f+16.0f);
+}
+TEST_CONSTEXPR(test_mm256_hadd_ps_constexpr())
__m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_hsub_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hsub_pd(A, B);
}
+constexpr bool test_mm256_hsub_pd_constexpr() {
+ constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
+ constexpr __m256d result = _mm256_hsub_pd(a, b);
+ return match_m256d(result,1.0-2.0,3.0-4.0,5.0-6.0,7.0-8.0);
+}
+TEST_CONSTEXPR(test_mm256_hsub_pd_constexpr())
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hsub_ps(A, B);
}
+constexpr bool test_mm256_hsub_ps_constexpr() {
+ constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
+ constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
+ constexpr __m256 result = _mm256_hsub_ps(a, b);
+ return match_m256(result,1.0f-2.0f,3.0f-4.0f,5.0f-6.0f,7.0f-8.0f,
+ 9.0f-10.0f,11.0f-12.0f,13.0f-14.0f,15.0f-16.0f);
+}
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index a39ce513837ea..02845b9417a1f 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -462,17 +462,48 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
return _mm256_hadd_epi16(a, b);
}
+constexpr bool test_mm256_hadd_epi16_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7,
+ 8,9,10,11,12,13,14,15,16);
+ constexpr __m256i b = _mm256_setr_epi16(17,18,19,20,21,22,23,
+ 24,25,26,27,28,29,30,31,32);
+
+ constexpr __m256i result = _mm256_hadd_epi16(a, b);
+ return match_v16si(result,1+2,3+4,5+6,7+8,9+10,11+12,13+14,15+16,17+18,19+20,21+22,23+24,25+26,27+28,29+30,31+32);
+}
+TEST_CONSTEXPR(test_mm256_hadd_epi16_constexpr())
+
__m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hadd_epi32(a, b);
}
+constexpr bool test_mm256_hadd_epi32_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi32(10, 20, 30, 40, 50, 60, 70, 80);
+ constexpr __m256i b = _mm256_setr_epi32(5, 15, 25, 35, 45, 55, 65, 75);
+
+ constexpr __m256i result = _mm256_hadd_epi32(a, b);
+ return match_v8si(result,10+20,30+40,50+60,70+80,5+15,25+35, 45+55,65+75);
+}
+TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
+
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadds_epi16(a, b);
}
+constexpr bool test_mm256_hadds_epi16_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+ constexpr __m256i b = _mm256_setr_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1);
+ constexpr __m256i result = _mm256_hadds_epi16(a, b);
+
+ return match_v16si(result, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm256_hadds_epi16_constexpr())
__m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi16
@@ -480,18 +511,50 @@ __m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
return _mm256_hsub_epi16(a, b);
}
+constexpr bool test_mm256_hsub_epi16_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7,
+ 8,9,10,11,12,13,14,15,16);
+ constexpr __m256i b = _mm256_setr_epi16(17,18,19,20,21,22,23,
+ 24,25,26,27,28,29,30,31,32);
+
+ constexpr __m256i result = _mm256_hsub_epi16(a, b);
+ return match_v16si(result,1-2,3-4,5-6,7-8,9-10,11-12,13-14,15-16,17-18,19-20,21-22,23-24,25-26,27-28,29-30,31-32);
+}
+TEST_CONSTEXPR(test_mm256_hsub_epi16_constexpr())
+
__m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hsub_epi32(a, b);
}
+constexpr bool test_mm256_hsub_epi32_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi32(10, 20, 30, 40, 50, 60, 70, 80);
+ constexpr __m256i b = _mm256_setr_epi32(5, 15, 25, 35, 45, 55, 65, 75);
+
+ constexpr __m256i result = _mm256_hsub_epi32(a, b);
+ return match_v8si(result,10-20,30-40,50-60,70-80,5-15,25-35, 45-55,65-75);
+}
+TEST_CONSTEXPR(test_mm256_hsub_epi32_constexpr())
+
__m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsubs_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsubs_epi16(a, b);
}
+constexpr bool test_mm256_hsubs_epi16_constexpr() {
+ constexpr __m256i a = _mm256_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+ constexpr __m256i b = _mm256_setr_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+ constexpr __m256i result3 = _mm256_hsubs_epi16(a, b);
+
+ return match_v16si(result3, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm256_hsubs_epi16_constexpr())
+
+
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi32
// CHECK: call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2)
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 7bd2475399bf9..8da0e8c814879 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -309,36 +309,84 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
return _mm_hadd_pi16(a, b);
}
+constexpr bool test_mm_hadd_pi16_constexpr() {
+ constexpr __m64 a = _mm_setr_pi16(1, 2, 3, 4);
+ constexpr __m64 b = _mm_setr_pi16(5,6,7,8);
+
+ constexpr __m64 result = _mm_hadd_pi16(a, b);
+ return match_v4si(result,1+2,3+4,5+6,7+8);
+}
+TEST_CONSTEXPR(test_mm_hadd_pi16_constexpr())
__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
return _mm_hadd_pi32(a, b);
}
+constexpr bool test_mm_hadd_pi32_constexpr() {
+ constexpr __m64 a = _mm_setr_pi32(1, 2);
+ constexpr __m64 b = _mm_setr_pi32(3, 4);
+
+ constexpr __m64 result = _mm_hadd_pi32(a, b);
+ return match_v2si(result,1+2,3+4);
+}
+TEST_CONSTEXPR(test_mm_hadd_pi32_constexpr())
__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadds_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
+constexpr bool test_mm_hadds_pi16_constexpr() {
+ constexpr __m64 a = _mm_setr_pi16(32767, 32767, 32767, 32767);
+ constexpr __m64 b = _mm_setr_pi16(1,1,1,1);
+
+ constexpr __m64 result = _mm_hadds_pi16(a, b);
+ return match_v4si(result,32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm_hadds_pi16_constexpr())
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
+constexpr bool test_mm_hsub_pi16_constexpr() {
+ constexpr __m64 a = _mm_setr_pi16(1, 2, 3, 4);
+ constexpr __m64 b = _mm_setr_pi16(5,6,7,8);
+
+ constexpr __m64 result = _mm_hsub_pi16(a, b);
+ return match_v4si(result,1-2,3-4,5-6,7-8);
+}
+TEST_CONSTEXPR(test_mm_hsub_pi16_constexpr())
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
+constexpr bool test_mm_hsub_pi32_constexpr() {
+ constexpr __m64 a = _mm_setr_pi32(1, 2);
+ constexpr __m64 b = _mm_setr_pi32(3, 4);
+
+ constexpr __m64 result = _mm_hsub_pi32(a, b);
+ return match_v2si(result,1-2,3-4);
+}
+TEST_CONSTEXPR(test_mm_hsub_pi32_constexpr())
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
+constexpr bool test_mm_hsubs_pi16_constexpr() {
+ constexpr __m64 a = _mm_setr_pi16(32767, 32767, 32767, 32767);
+ constexpr __m64 b = _mm_setr_pi16(-1,-1,-1,-1);
+
+ constexpr __m64 result = _mm_hsubs_pi16(a, b);
+ return match_v4si(result,32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm_hsubs_pi16_constexpr())
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index 56ff73f08ab32..bd0ef43278217 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -60,36 +60,85 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadd_epi16(a, b);
}
+constexpr bool test_mm_hadd_epi16_constexpr() {
+ constexpr __m128i a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ constexpr __m128i b = _mm_setr_epi16(17,18,19,20,21,22,23,24);
+
+ constexpr __m128i result = _mm_hadd_epi16(a, b);
+ return match_v8si(result,1+2,3+4,5+6,7+8,17+18,19+20,21+22,23+24);
+}
+TEST_CONSTEXPR(test_mm_hadd_epi16_constexpr())
__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadd_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hadd_epi32(a, b);
}
+constexpr bool test_mm_hadd_epi32_constexpr() {
+ constexpr __m128i a = _mm_setr_epi32(1, 2, 3, 4);
+ constexpr __m128i b = _mm_setr_epi32(5,6,7,8);
+
+ constexpr __m128i result = _mm_hadd_epi32(a, b);
+ return match_v4si(result,1+2,3+4,5+6,7+8);
+}
+TEST_CONSTEXPR(test_mm_hadd_epi32_constexpr())
__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadds_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadds_epi16(a, b);
}
+constexpr bool test_mm_hadds_epi16_constexpr() {
+ constexpr __m128i a = _mm_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+ constexpr __m128i b = _mm_setr_epi16(1, 1, 1, 1, 1, 1, 1, 1);
+ constexpr __m128i result = _mm_hadds_epi16(a, b);
+
+ return match_v8si(result, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm_hadds_epi16_constexpr())
+
__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsub_epi16(a, b);
}
+constexpr bool test_mm_hsub_epi16_constexpr() {
+ constexpr __m128i a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ constexpr __m128i b = _mm_setr_epi16(9,10,11,12,13,14,15,16);
+
+ constexpr __m128i result = _mm_hsub_epi16(a, b);
+ return match_v8si(result,1-2,3-4,5-6,7-8,9-10,11-12,13-14,15-16);
+}
+TEST_CONSTEXPR(test_mm_hsub_epi16_constexpr())
__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hsub_epi32(a, b);
}
+constexpr bool test_mm_hsub_epi32_constexpr() {
+ constexpr __m128i a = _mm_setr_epi32(1, 2, 3, 4);
+ constexpr __m128i b = _mm_setr_epi32(5,6,7,8);
+
+ constexpr __m128i result = _mm_hsub_epi32(a, b);
+ return match_v4si(result,1-2,3-4,5-6,7-8);
+}
+TEST_CONSTEXPR(test_mm_hsub_epi32_constexpr())
__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsubs_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsubs_epi16(a, b);
}
+constexpr bool test_mm_hsubs_epi16_constexpr() {
+ constexpr __m128i a = _mm_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+ constexpr __m128i b = _mm_setr_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ constexpr __m128i result3 = _mm_hsubs_epi16(a, b);
+
+ return match_v8si(result3, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
+}
+TEST_CONSTEXPR(test_mm_hsubs_epi16_constexpr())
__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_maddubs_epi16
>From 2fadf3fd261935e25adff5b26ad8ee0734746a26 Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Thu, 4 Sep 2025 15:55:44 +0800
Subject: [PATCH 2/6] deal issues 15595 [Clang]
VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX
PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #155395
---
clang/lib/Headers/avx2intrin.h | 15 ++++-----
clang/lib/Headers/avxintrin.h | 15 ++++-----
clang/lib/Headers/pmmintrin.h | 4 +--
clang/lib/Headers/tmmintrin.h | 57 +++++++++++++++-------------------
4 files changed, 39 insertions(+), 52 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f8fb808f7f29c..c39f94c7fc16b 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -886,9 +886,8 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) {
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_hadd_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
+_mm256_hadd_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -987,9 +986,8 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) {
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_hsub_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
+_mm256_hsub_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1023,9 +1021,8 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_hsubs_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
+_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
}
/// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 976710a64e80e..48d79063f9b61 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -704,8 +704,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
/// both operands.
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_hadd_pd(__m256d __a, __m256d __b)
-{
+_mm256_hadd_pd(__m256d __a, __m256d __b) {
return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
}
@@ -726,8 +725,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
/// index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
/// both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_hadd_ps(__m256 __a, __m256 __b) {
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a,
+ __m256 __b) {
return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
}
@@ -749,8 +748,7 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) {
/// \returns A 256-bit vector of [4 x double] containing the horizontal
/// differences of both operands.
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_hsub_pd(__m256d __a, __m256d __b)
-{
+_mm256_hsub_pd(__m256d __a, __m256d __b) {
return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
}
@@ -771,9 +769,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
/// elements with index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal
/// differences of both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_hsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
+ __m256 __b) {
return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
}
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index 400b28bb877a1..67f2a7ffd1f56 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -89,8 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
/// destination.
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
/// both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hadd_ps(__m128 __a, __m128 __b) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
}
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index d79f7f6ea4091..b408c6a3404ec 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -206,8 +206,7 @@ _mm_abs_epi32(__m128i __a) {
/// both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_hadd_epi16(__m128i __a, __m128i __b) {
- return (__m128i)__builtin_ia32_phaddw128(
- (__v8hi)__a, (__v8hi)__b);
+ return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -249,8 +248,8 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) {
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
- _mm_hadd_pi16(__m64 __a, __m64 __b) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
+ __m64 __b) {
return __trunc64(__builtin_ia32_phaddw128(
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
@@ -272,11 +271,10 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hadd_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddd128(
- (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -324,11 +322,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) {
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hadds_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddsw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -392,11 +389,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) {
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hsub_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -416,11 +412,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hsub_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubd128(
- (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -468,11 +463,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) {
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_hsubs_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubsw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -553,9 +547,8 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) {
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
/// products of both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_mulhrs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
+_mm_mulhrs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
>From ed4a09fb51ab347b4778b81d1f8c511d31d106a7 Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Thu, 11 Sep 2025 13:48:13 +0800
Subject: [PATCH 3/6] constexpr deal
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 243 +++++++++++++++++----
clang/lib/AST/ExprConstant.cpp | 266 +++++++++++++++++------
2 files changed, 407 insertions(+), 102 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8c2b71160f7f3..f6027c78935c3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -8,6 +8,7 @@
#include "../ExprConstShared.h"
#include "Boolean.h"
#include "EvalEmitter.h"
+#include "Floating.h"
#include "Interp.h"
#include "InterpBuiltinBitCast.h"
#include "PrimType.h"
@@ -19,6 +20,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
+#include <cassert>
namespace clang {
namespace interp {
@@ -2736,6 +2738,141 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ uint32_t BuiltinID) {
+ assert(Call->getArg(0)->getType()->isVectorType() &&
+ Call->getArg(1)->getType()->isVectorType());
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ unsigned SourceLen = VT->getNumElements();
+ assert(SourceLen % 2 == 0 &&
+ Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == SourceLen);
+ PrimType DstElemT = *S.getContext().classify(
+ Call->getType()->castAs<VectorType>()->getElementType());
+ unsigned DstElem = 0;
+
+ bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256);
+
+ bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256);
+
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ APSInt Elem1;
+ APSInt Elem2;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ Elem1 = LHS.elem<T>(I).toAPSInt();
+ Elem2 = LHS.elem<T>(I+1).toAPSInt();
+ });
+ APSInt Result;
+ if (IsAdd) {
+ if (IsSaturating) {
+ Result = APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ }else{
+ Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
+ }
+ }else{
+ if (IsSaturating) {
+ Result =
+ APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ } else {
+ Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
+ }
+ }
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT,
+ { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
+ ++DstElem;
+ }
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ APSInt Elem1;
+ APSInt Elem2;
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ Elem1 = RHS.elem<T>(I).toAPSInt();
+ Elem2 = RHS.elem<T>(I + 1).toAPSInt();
+ });
+ APSInt Result;
+ if (IsAdd) {
+ if (IsSaturating) {
+ Result =
+ APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ } else {
+ Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
+ }
+ } else {
+ if (IsSaturating) {
+ Result = APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ } else {
+ Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
+ }
+ }
+ INT_TYPE_SWITCH_NO_BOOL(DstElemT,
+ { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
+ ++DstElem;
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ uint32_t BuiltinID) {
+ assert(Call->getArg(0)->getType()->isVectorType() &&
+ Call->getArg(1)->getType()->isVectorType());
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned SourceLen = VT->getNumElements();
+ assert(SourceLen % 2 == 0 &&
+ Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ SourceLen);
+ unsigned DstElem = 0;
+ bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd ||
+ BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 ||
+ BuiltinID == clang::X86::BI__builtin_ia32_haddps ||
+ BuiltinID == clang::X86::BI__builtin_ia32_haddps256);
+ using T = Floating;
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
+ APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
+
+ if (IsAdd) {
+ Elem1.add(Elem2, RM);
+ } else {
+ Elem1.subtract(Elem2, RM);
+ }
+ Dst.elem<T>(DstElem++) = Elem1;
+ }
+ for (unsigned I = 0; I != SourceLen; I += 2) {
+ APFloat Elem1 = RHS.elem<T>(I).getAPFloat();
+ APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat();
+ if (IsAdd) {
+ Elem1.add(Elem2, RM);
+ } else {
+ Elem1.subtract(Elem2, RM);
+ }
+ Dst.elem<T>(DstElem++) = Elem1;
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 3);
@@ -3356,49 +3493,73 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
- case clang::X86::BI__builtin_ia32_pmuldq128:
- case clang::X86::BI__builtin_ia32_pmuldq256:
- case clang::X86::BI__builtin_ia32_pmuldq512:
- case clang::X86::BI__builtin_ia32_pmuludq128:
- case clang::X86::BI__builtin_ia32_pmuludq256:
- case clang::X86::BI__builtin_ia32_pmuludq512:
- return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
-
- case Builtin::BI__builtin_elementwise_fma:
- return interp__builtin_elementwise_fma(S, OpPC, Call);
-
- case X86::BI__builtin_ia32_selectb_128:
- case X86::BI__builtin_ia32_selectb_256:
- case X86::BI__builtin_ia32_selectb_512:
- case X86::BI__builtin_ia32_selectw_128:
- case X86::BI__builtin_ia32_selectw_256:
- case X86::BI__builtin_ia32_selectw_512:
- case X86::BI__builtin_ia32_selectd_128:
- case X86::BI__builtin_ia32_selectd_256:
- case X86::BI__builtin_ia32_selectd_512:
- case X86::BI__builtin_ia32_selectq_128:
- case X86::BI__builtin_ia32_selectq_256:
- case X86::BI__builtin_ia32_selectq_512:
- case X86::BI__builtin_ia32_selectph_128:
- case X86::BI__builtin_ia32_selectph_256:
- case X86::BI__builtin_ia32_selectph_512:
- case X86::BI__builtin_ia32_selectpbf_128:
- case X86::BI__builtin_ia32_selectpbf_256:
- case X86::BI__builtin_ia32_selectpbf_512:
- case X86::BI__builtin_ia32_selectps_128:
- case X86::BI__builtin_ia32_selectps_256:
- case X86::BI__builtin_ia32_selectps_512:
- case X86::BI__builtin_ia32_selectpd_128:
- case X86::BI__builtin_ia32_selectpd_256:
- case X86::BI__builtin_ia32_selectpd_512:
- return interp__builtin_select(S, OpPC, Call);
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+
+ return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);
+ case clang::X86::BI__builtin_ia32_haddpd:
+ case clang::X86::BI__builtin_ia32_haddpd256:
+ case clang::X86::BI__builtin_ia32_haddps:
+ case clang::X86::BI__builtin_ia32_haddps256:
+ case clang::X86::BI__builtin_ia32_hsubpd:
+ case clang::X86::BI__builtin_ia32_hsubpd256:
+ case clang::X86::BI__builtin_ia32_hsubps:
+ case clang::X86::BI__builtin_ia32_hsubps256:
+ return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);
- default:
- S.FFDiag(S.Current->getLocation(OpPC),
- diag::note_invalid_subexpr_in_const_expr)
- << S.Current->getRange(OpPC);
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
+
+ case Builtin::BI__builtin_elementwise_fma:
+ return interp__builtin_elementwise_fma(S, OpPC, Call);
+
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ return interp__builtin_select(S, OpPC, Call);
- return false;
+ default:
+ S.FFDiag(S.Current->getLocation(OpPC),
+ diag::note_invalid_subexpr_in_const_expr)
+ << S.Current->getRange(OpPC);
+
+ return false;
}
llvm_unreachable("Unhandled builtin ID");
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 66362d44976c4..774a3adf1a7ca 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -55,6 +55,7 @@
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -12105,6 +12106,145 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:{
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt LHSA = SourceLHS.getVectorElt(EltNum).getInt();
+ APSInt LHSB = SourceLHS.getVectorElt(EltNum + 1).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ ResultElements.push_back(
+ APValue(APSInt(LHSA+LHSB, DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ ResultElements.push_back(APValue(APSInt(
+ LHSA.isSigned() ? LHSA.sadd_sat(LHSB) : LHSA.uadd_sat(LHSB),
+ DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ ResultElements.push_back(APValue(APSInt(LHSA - LHSB, DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+ ResultElements.push_back(APValue(APSInt(
+ LHSA.isSigned() ? LHSA.ssub_sat(LHSB) : LHSA.usub_sat(LHSB),
+ DestUnsigned)));
+ break;
+ }
+ }
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt RHSA = SourceRHS.getVectorElt(EltNum).getInt();
+ APSInt RHSB = SourceRHS.getVectorElt(EltNum + 1).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ ResultElements.push_back(APValue(APSInt(RHSA + RHSB, DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ ResultElements.push_back(APValue(
+ APSInt(RHSA.isSigned() ? RHSA.sadd_sat(RHSB) : RHSA.uadd_sat(RHSB),
+ DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ ResultElements.push_back(APValue(APSInt(RHSA - RHSB, DestUnsigned)));
+ break;
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+ ResultElements.push_back(APValue(
+ APSInt(RHSA.isSigned() ? RHSA.ssub_sat(RHSB) : RHSA.usub_sat(RHSB),
+ DestUnsigned)));
+ break;
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_haddpd:
+ case clang::X86::BI__builtin_ia32_haddpd256:
+ case clang::X86::BI__builtin_ia32_haddps:
+ case clang::X86::BI__builtin_ia32_haddps256: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat();
+ APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat();
+ LHSA.add(LHSB, APFloat::rmNearestTiesToEven);
+ ResultElements.push_back(APValue(LHSA));
+ }
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat();
+ APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat();
+ RHSA.add(RHSB, APFloat::rmNearestTiesToEven);
+ ResultElements.push_back(APValue(RHSA));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case clang::X86::BI__builtin_ia32_hsubpd:
+ case clang::X86::BI__builtin_ia32_hsubpd256:
+ case clang::X86::BI__builtin_ia32_hsubps:
+ case clang::X86::BI__builtin_ia32_hsubps256: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat();
+ APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat();
+ LHSA.subtract(LHSB, APFloat::rmNearestTiesToEven);
+ ResultElements.push_back(APValue(LHSA));
+ }
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat();
+ APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat();
+ RHSA.subtract(RHSB, APFloat::rmNearestTiesToEven);
+ ResultElements.push_back(APValue(RHSA));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
}
}
@@ -12197,67 +12337,71 @@ bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
namespace {
class ArrayExprEvaluator
- : public ExprEvaluatorBase<ArrayExprEvaluator> {
- const LValue &This;
- APValue &Result;
- public:
-
- ArrayExprEvaluator(EvalInfo &Info, const LValue &This, APValue &Result)
- : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {}
-
- bool Success(const APValue &V, const Expr *E) {
- assert(V.isArray() && "expected array");
- Result = V;
- return true;
- }
-
- bool ZeroInitialization(const Expr *E) {
- const ConstantArrayType *CAT =
- Info.Ctx.getAsConstantArrayType(E->getType());
- if (!CAT) {
- if (E->getType()->isIncompleteArrayType()) {
- // We can be asked to zero-initialize a flexible array member; this
- // is represented as an ImplicitValueInitExpr of incomplete array
- // type. In this case, the array has zero elements.
- Result = APValue(APValue::UninitArray(), 0, 0);
- return true;
- }
- // FIXME: We could handle VLAs here.
- return Error(E);
- }
-
- Result = APValue(APValue::UninitArray(), 0, CAT->getZExtSize());
- if (!Result.hasArrayFiller())
- return true;
-
- // Zero-initialize all elements.
- LValue Subobject = This;
- Subobject.addArray(Info, E, CAT);
- ImplicitValueInitExpr VIE(CAT->getElementType());
- return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE);
- }
-
- bool VisitCallExpr(const CallExpr *E) {
- return handleCallExpr(E, Result, &This);
- }
- bool VisitInitListExpr(const InitListExpr *E,
- QualType AllocType = QualType());
- bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E);
- bool VisitCXXConstructExpr(const CXXConstructExpr *E);
- bool VisitCXXConstructExpr(const CXXConstructExpr *E,
- const LValue &Subobject,
- APValue *Value, QualType Type);
- bool VisitStringLiteral(const StringLiteral *E,
- QualType AllocType = QualType()) {
- expandStringLiteral(Info, E, Result, AllocType);
- return true;
- }
- bool VisitCXXParenListInitExpr(const CXXParenListInitExpr *E);
- bool VisitCXXParenListOrInitListExpr(const Expr *ExprToVisit,
- ArrayRef<Expr *> Args,
- const Expr *ArrayFiller,
- QualType AllocType = QualType());
- };
+ :
+ public
+ ExprEvaluatorBase<ArrayExprEvaluator> {
+ const LValue &This;
+ APValue & Result;
+
+ public:
+ ArrayExprEvaluator(EvalInfo & Info, const LValue &This,
+ APValue &Result)
+ : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {}
+
+ bool Success(const APValue &V, const Expr *E) {
+ assert(V.isArray() && "expected array");
+ Result = V;
+ return true;
+ }
+
+ bool ZeroInitialization(const Expr *E) {
+ const ConstantArrayType *CAT =
+ Info.Ctx.getAsConstantArrayType(E->getType());
+ if (!CAT) {
+ if (E->getType()->isIncompleteArrayType()) {
+ // We can be asked to zero-initialize a flexible array member;
+ // this is represented as an ImplicitValueInitExpr of
+ // incomplete array type. In this case, the array has zero
+ // elements.
+ Result = APValue(APValue::UninitArray(), 0, 0);
+ return true;
+ }
+ // FIXME: We could handle VLAs here.
+ return Error(E);
+ }
+
+ Result = APValue(APValue::UninitArray(), 0, CAT->getZExtSize());
+ if (!Result.hasArrayFiller())
+ return true;
+
+ // Zero-initialize all elements.
+ LValue Subobject = This;
+ Subobject.addArray(Info, E, CAT);
+ ImplicitValueInitExpr VIE(CAT->getElementType());
+ return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject,
+ &VIE);
+ }
+
+ bool VisitCallExpr(const CallExpr *E) {
+ return handleCallExpr(E, Result, &This);
+ }
+ bool VisitInitListExpr(const InitListExpr *E,
+ QualType AllocType = QualType());
+ bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E);
+ bool VisitCXXConstructExpr(const CXXConstructExpr *E);
+ bool VisitCXXConstructExpr(const CXXConstructExpr *E,
+ const LValue &Subobject, APValue *Value,
+ QualType Type);
+ bool VisitStringLiteral(const StringLiteral *E,
+ QualType AllocType = QualType()) {
+ expandStringLiteral(Info, E, Result, AllocType);
+ return true;
+ }
+ bool VisitCXXParenListInitExpr(const CXXParenListInitExpr *E);
+ bool VisitCXXParenListOrInitListExpr(
+ const Expr *ExprToVisit, ArrayRef<Expr *> Args,
+ const Expr *ArrayFiller, QualType AllocType = QualType());
+ };
} // end anonymous namespace
static bool EvaluateArray(const Expr *E, const LValue &This,
>From df6242e4b74e8170cd28a2f9663aa974a4b0b12b Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Sat, 13 Sep 2025 20:58:15 +0800
Subject: [PATCH 4/6] adjust unit test #146940
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 162 ++++++++++++-----------
clang/test/CodeGen/X86/avx-builtins.c | 39 ++----
clang/test/CodeGen/X86/avx2-builtins.c | 87 ++++--------
clang/test/CodeGen/X86/mmx-builtins.c | 54 +-------
clang/test/CodeGen/X86/ssse3-builtins.c | 54 +-------
5 files changed, 128 insertions(+), 268 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f6027c78935c3..9d5d70698b8d3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2739,9 +2739,9 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
}
static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call,
- uint32_t BuiltinID) {
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ uint32_t BuiltinID) {
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const Pointer &RHS = S.Stk.pop<Pointer>();
@@ -2752,7 +2752,8 @@ static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
PrimType ElemT = *S.getContext().classify(VT->getElementType());
unsigned SourceLen = VT->getNumElements();
assert(SourceLen % 2 == 0 &&
- Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == SourceLen);
+ Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ SourceLen);
PrimType DstElemT = *S.getContext().classify(
Call->getType()->castAs<VectorType>()->getElementType());
unsigned DstElem = 0;
@@ -2774,16 +2775,17 @@ static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
Elem1 = LHS.elem<T>(I).toAPSInt();
- Elem2 = LHS.elem<T>(I+1).toAPSInt();
+ Elem2 = LHS.elem<T>(I + 1).toAPSInt();
});
APSInt Result;
if (IsAdd) {
- if (IsSaturating) {
- Result = APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
- }else{
- Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
- }
- }else{
+ if (IsSaturating) {
+ Result =
+ APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ } else {
+ Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
+ }
+ } else {
if (IsSaturating) {
Result =
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
@@ -2812,7 +2814,8 @@ static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
}
} else {
if (IsSaturating) {
- Result = APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
+ Result =
+ APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
} else {
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
}
@@ -2826,15 +2829,15 @@ static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
}
static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call,
- uint32_t BuiltinID) {
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ uint32_t BuiltinID) {
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
-
+
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
@@ -2851,7 +2854,6 @@ static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
for (unsigned I = 0; I != SourceLen; I += 2) {
APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
-
if (IsAdd) {
Elem1.add(Elem2, RM);
} else {
@@ -3495,71 +3497,71 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
- case clang::X86::BI__builtin_ia32_phaddd128:
- case clang::X86::BI__builtin_ia32_phaddd256:
- case clang::X86::BI__builtin_ia32_phaddsw128:
- case clang::X86::BI__builtin_ia32_phaddsw256:
- case clang::X86::BI__builtin_ia32_phsubw128:
- case clang::X86::BI__builtin_ia32_phsubw256:
- case clang::X86::BI__builtin_ia32_phsubd128:
- case clang::X86::BI__builtin_ia32_phsubd256:
- case clang::X86::BI__builtin_ia32_phsubsw128:
- case clang::X86::BI__builtin_ia32_phsubsw256:
-
- return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);
- case clang::X86::BI__builtin_ia32_haddpd:
- case clang::X86::BI__builtin_ia32_haddpd256:
- case clang::X86::BI__builtin_ia32_haddps:
- case clang::X86::BI__builtin_ia32_haddps256:
- case clang::X86::BI__builtin_ia32_hsubpd:
- case clang::X86::BI__builtin_ia32_hsubpd256:
- case clang::X86::BI__builtin_ia32_hsubps:
- case clang::X86::BI__builtin_ia32_hsubps256:
- return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);
-
- case clang::X86::BI__builtin_ia32_pmuldq128:
- case clang::X86::BI__builtin_ia32_pmuldq256:
- case clang::X86::BI__builtin_ia32_pmuldq512:
- case clang::X86::BI__builtin_ia32_pmuludq128:
- case clang::X86::BI__builtin_ia32_pmuludq256:
- case clang::X86::BI__builtin_ia32_pmuludq512:
- return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
-
- case Builtin::BI__builtin_elementwise_fma:
- return interp__builtin_elementwise_fma(S, OpPC, Call);
-
- case X86::BI__builtin_ia32_selectb_128:
- case X86::BI__builtin_ia32_selectb_256:
- case X86::BI__builtin_ia32_selectb_512:
- case X86::BI__builtin_ia32_selectw_128:
- case X86::BI__builtin_ia32_selectw_256:
- case X86::BI__builtin_ia32_selectw_512:
- case X86::BI__builtin_ia32_selectd_128:
- case X86::BI__builtin_ia32_selectd_256:
- case X86::BI__builtin_ia32_selectd_512:
- case X86::BI__builtin_ia32_selectq_128:
- case X86::BI__builtin_ia32_selectq_256:
- case X86::BI__builtin_ia32_selectq_512:
- case X86::BI__builtin_ia32_selectph_128:
- case X86::BI__builtin_ia32_selectph_256:
- case X86::BI__builtin_ia32_selectph_512:
- case X86::BI__builtin_ia32_selectpbf_128:
- case X86::BI__builtin_ia32_selectpbf_256:
- case X86::BI__builtin_ia32_selectpbf_512:
- case X86::BI__builtin_ia32_selectps_128:
- case X86::BI__builtin_ia32_selectps_256:
- case X86::BI__builtin_ia32_selectps_512:
- case X86::BI__builtin_ia32_selectpd_128:
- case X86::BI__builtin_ia32_selectpd_256:
- case X86::BI__builtin_ia32_selectpd_512:
- return interp__builtin_select(S, OpPC, Call);
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+ return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);
+
+ case clang::X86::BI__builtin_ia32_haddpd:
+ case clang::X86::BI__builtin_ia32_haddpd256:
+ case clang::X86::BI__builtin_ia32_haddps:
+ case clang::X86::BI__builtin_ia32_haddps256:
+ case clang::X86::BI__builtin_ia32_hsubpd:
+ case clang::X86::BI__builtin_ia32_hsubpd256:
+ case clang::X86::BI__builtin_ia32_hsubps:
+ case clang::X86::BI__builtin_ia32_hsubps256:
+ return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);
+
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
+
+ case Builtin::BI__builtin_elementwise_fma:
+ return interp__builtin_elementwise_fma(S, OpPC, Call);
+
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ return interp__builtin_select(S, OpPC, Call);
- default:
- S.FFDiag(S.Current->getLocation(OpPC),
- diag::note_invalid_subexpr_in_const_expr)
- << S.Current->getRange(OpPC);
+ default:
+ S.FFDiag(S.Current->getLocation(OpPC),
+ diag::note_invalid_subexpr_in_const_expr)
+ << S.Current->getRange(OpPC);
- return false;
+ return false;
}
llvm_unreachable("Unhandled builtin ID");
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 9857b84c94112..4e21cfea41553 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1083,53 +1083,34 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hadd_pd(A, B);
}
-constexpr bool test_mm256_hadd_epi32_constexpr() {
- constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
- constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
- constexpr __m256d result = _mm256_hadd_pd(a, b);
- return match_m256d(result,1.0+2.0,3.0+4.0,5.0+6.0,7.0+8.0);
-}
-TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
+TEST_CONSTEXPR(match_m256d(_mm256_hadd_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__m256d){5.0, 6.0, 7.0, 8.0}), 3.0, 7.0, 11.0, 15.0));
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hadd_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hadd_ps(A, B);
}
-constexpr bool test_mm256_hadd_ps_constexpr() {
- constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
- constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
- constexpr __m256 result = _mm256_hadd_ps(a, b);
- return match_m256(result,1.0f+2.0f,3.0f+4.0f,5.0f+6.0f,7.0f+8.0f,
- 9.0f+10.0f,11.0f+12.0f,13.0f+14.0f,15.0f+16.0f);
-}
-TEST_CONSTEXPR(test_mm256_hadd_ps_constexpr())
+TEST_CONSTEXPR(_mm256_hadd_ps(
+ (__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f},
+ (__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f},
+ 3.0f, 7.0f, 11.0f, 15.0f, 19.0f, 23.0f, 27.0f, 31.0f))
__m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_hsub_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hsub_pd(A, B);
}
-constexpr bool test_mm256_hsub_pd_constexpr() {
- constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
- constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
- constexpr __m256d result = _mm256_hsub_pd(a, b);
- return match_m256d(result,1.0-2.0,3.0-4.0,5.0-6.0,7.0-8.0);
-}
-TEST_CONSTEXPR(test_mm256_hsub_pd_constexpr())
+TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__m256d){5.0, 6.0, 7.0, 8.0}), -1.0,-1.0,-1.0,-1.0));
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hsub_ps(A, B);
}
-constexpr bool test_mm256_hsub_ps_constexpr() {
- constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
- constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
- constexpr __m256 result = _mm256_hsub_ps(a, b);
- return match_m256(result,1.0f-2.0f,3.0f-4.0f,5.0f-6.0f,7.0f-8.0f,
- 9.0f-10.0f,11.0f-12.0f,13.0f-14.0f,15.0f-16.0f);
-}
+TEST_CONSTEXPR(_mm256_hsub_ps(
+ (__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f},
+ (__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f},
+ -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index c34594cf78a8e..a9095de4fe373 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -461,99 +461,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadd_epi16(a, b);
}
-
-constexpr bool test_mm256_hadd_epi16_constexpr() {
- constexpr __m256i a = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7,
- 8,9,10,11,12,13,14,15,16);
- constexpr __m256i b = _mm256_setr_epi16(17,18,19,20,21,22,23,
- 24,25,26,27,28,29,30,31,32);
-
- constexpr __m256i result = _mm256_hadd_epi16(a, b);
- return match_v16si(result,1+2,3+4,5+6,7+8,9+10,11+12,13+14,15+16,17+18,19+20,21+22,23+24,25+26,27+28,29+30,31+32);
-}
-TEST_CONSTEXPR(test_mm256_hadd_epi16_constexpr())
+TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16(
+ (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
+ (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
+ 3,7,11,15,19,23,27,31,35,39,43,47,51,55,59,63));
__m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hadd_epi32(a, b);
}
-
-constexpr bool test_mm256_hadd_epi32_constexpr() {
- constexpr __m256i a = _mm256_setr_epi32(10, 20, 30, 40, 50, 60, 70, 80);
- constexpr __m256i b = _mm256_setr_epi32(5, 15, 25, 35, 45, 55, 65, 75);
-
- constexpr __m256i result = _mm256_hadd_epi32(a, b);
- return match_v8si(result,10+20,30+40,50+60,70+80,5+15,25+35, 45+55,65+75);
-}
-TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
+TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
+ (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75})
+ 30,70,110,150,20,60,100,140))
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadds_epi16(a, b);
}
-constexpr bool test_mm256_hadds_epi16_constexpr() {
- constexpr __m256i a = _mm256_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
- constexpr __m256i b = _mm256_setr_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1);
- constexpr __m256i result = _mm256_hadds_epi16(a, b);
-
- return match_v16si(result, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm256_hadds_epi16_constexpr())
+TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
+ (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
+ (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767)))
__m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsub_epi16(a, b);
}
-
-constexpr bool test_mm256_hsub_epi16_constexpr() {
- constexpr __m256i a = _mm256_setr_epi16(1, 2, 3, 4, 5, 6, 7,
- 8,9,10,11,12,13,14,15,16);
- constexpr __m256i b = _mm256_setr_epi16(17,18,19,20,21,22,23,
- 24,25,26,27,28,29,30,31,32);
-
- constexpr __m256i result = _mm256_hsub_epi16(a, b);
- return match_v16si(result,1-2,3-4,5-6,7-8,9-10,11-12,13-14,15-16,17-18,19-20,21-22,23-24,25-26,27-28,29-30,31-32);
-}
-TEST_CONSTEXPR(test_mm256_hsub_epi16_constexpr())
+TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16(
+ (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
+ (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1));
__m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hsub_epi32(a, b);
}
-
-constexpr bool test_mm256_hsub_epi32_constexpr() {
- constexpr __m256i a = _mm256_setr_epi32(10, 20, 30, 40, 50, 60, 70, 80);
- constexpr __m256i b = _mm256_setr_epi32(5, 15, 25, 35, 45, 55, 65, 75);
-
- constexpr __m256i result = _mm256_hsub_epi32(a, b);
- return match_v8si(result,10-20,30-40,50-60,70-80,5-15,25-35, 45-55,65-75);
-}
-TEST_CONSTEXPR(test_mm256_hsub_epi32_constexpr())
+TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32(
+ (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75})
+ -10,-10,-10,-10,-10,-10,-10,-10))
__m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsubs_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsubs_epi16(a, b);
}
-
-constexpr bool test_mm256_hsubs_epi16_constexpr() {
- constexpr __m256i a = _mm256_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
- constexpr __m256i b = _mm256_setr_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
- constexpr __m256i result3 = _mm256_hsubs_epi16(a, b);
-
- return match_v16si(result3, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm256_hsubs_epi16_constexpr())
-
+TEST_CONSTEXPR(match_v16hi( _mm256_hsubs_epi16(
+ (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
+ (__m256i)(__v16hi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767)))
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 70f521e380dd4..944af98ffcadc 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -309,84 +309,42 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
return _mm_hadd_pi16(a, b);
}
-constexpr bool test_mm_hadd_pi16_constexpr() {
- constexpr __m64 a = _mm_setr_pi16(1, 2, 3, 4);
- constexpr __m64 b = _mm_setr_pi16(5,6,7,8);
-
- constexpr __m64 result = _mm_hadd_pi16(a, b);
- return match_v4si(result,1+2,3+4,5+6,7+8);
-}
-TEST_CONSTEXPR(test_mm_hadd_pi16_constexpr())
+TEST_CONSTEXPR(match_v4hi(_mm_hadd_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),3,7,11,15));
__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
return _mm_hadd_pi32(a, b);
}
-constexpr bool test_mm_hadd_pi32_constexpr() {
- constexpr __m64 a = _mm_setr_pi32(1, 2);
- constexpr __m64 b = _mm_setr_pi32(3, 4);
-
- constexpr __m64 result = _mm_hadd_pi32(a, b);
- return match_v2si(result,1+2,3+4);
-}
-TEST_CONSTEXPR(test_mm_hadd_pi32_constexpr())
+TEST_CONSTEXPR(match_v2si(_mm_hadd_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),3,7));
__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadds_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
-constexpr bool test_mm_hadds_pi16_constexpr() {
- constexpr __m64 a = _mm_setr_pi16(32767, 32767, 32767, 32767);
- constexpr __m64 b = _mm_setr_pi16(1,1,1,1);
-
- constexpr __m64 result = _mm_hadds_pi16(a, b);
- return match_v4si(result,32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm_hadds_pi16_constexpr())
+TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 32767, 32767},(__m64)(__v4hi){32767, 32767, 32767, 32767}),32767, 32767, 32767, 32767));
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
-constexpr bool test_mm_hsub_pi16_constexpr() {
- constexpr __m64 a = _mm_setr_pi16(1, 2, 3, 4);
- constexpr __m64 b = _mm_setr_pi16(5,6,7,8);
-
- constexpr __m64 result = _mm_hsub_pi16(a, b);
- return match_v4si(result,1-2,3-4,5-6,7-8);
-}
-TEST_CONSTEXPR(test_mm_hsub_pi16_constexpr())
+TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),-1,-1,-1,-1));
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
-constexpr bool test_mm_hsub_pi32_constexpr() {
- constexpr __m64 a = _mm_setr_pi32(1, 2);
- constexpr __m64 b = _mm_setr_pi32(3, 4);
-
- constexpr __m64 result = _mm_hsub_pi32(a, b);
- return match_v2si(result,1-2,3-4);
-}
-TEST_CONSTEXPR(test_mm_hsub_pi32_constexpr())
+TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),-1,-1));
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
-constexpr bool test_mm_hsubs_pi16_constexpr() {
- constexpr __m64 a = _mm_setr_pi16(32767, 32767, 32767, 32767);
- constexpr __m64 b = _mm_setr_pi16(-1,-1,-1,-1);
-
- constexpr __m64 result = _mm_hsubs_pi16(a, b);
- return match_v4si(result,32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm_hsubs_pi16_constexpr())
+TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 32767, 32767},(__m64)(__v4hi){-4,-5,-6,-7}),32767, 32767, 32767, 32767));
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index bd0ef43278217..61c7ee31af96f 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -60,42 +60,21 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadd_epi16(a, b);
}
-constexpr bool test_mm_hadd_epi16_constexpr() {
- constexpr __m128i a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
- constexpr __m128i b = _mm_setr_epi16(17,18,19,20,21,22,23,24);
-
- constexpr __m128i result = _mm_hadd_epi16(a, b);
- return match_v8si(result,1+2,3+4,5+6,7+8,17+18,19+20,21+22,23+24);
-}
-TEST_CONSTEXPR(test_mm_hadd_epi16_constexpr())
+TEST_CONSTEXPR(match_v8hi(_mm_hadd_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){17,18,19,20,21,22,23,24}), 3,7,11,15,35,39,43,47));
__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadd_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hadd_epi32(a, b);
}
-constexpr bool test_mm_hadd_epi32_constexpr() {
- constexpr __m128i a = _mm_setr_epi32(1, 2, 3, 4);
- constexpr __m128i b = _mm_setr_epi32(5,6,7,8);
-
- constexpr __m128i result = _mm_hadd_epi32(a, b);
- return match_v4si(result,1+2,3+4,5+6,7+8);
-}
-TEST_CONSTEXPR(test_mm_hadd_epi32_constexpr())
+TEST_CONSTEXPR(match_v4si(_mm_hadd_epi32((__m128i)(__v4si){1,2,3,4}, (__m128i)(__v4si){5,6,7,8}), 3,7,11,15));
__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadds_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadds_epi16(a, b);
}
-constexpr bool test_mm_hadds_epi16_constexpr() {
- constexpr __m128i a = _mm_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
- constexpr __m128i b = _mm_setr_epi16(1, 1, 1, 1, 1, 1, 1, 1);
- constexpr __m128i result = _mm_hadds_epi16(a, b);
-
- return match_v8si(result, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm_hadds_epi16_constexpr())
+TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,30000,30000,30000,30000,30000,30000}, (__m128i)(__v8hi){30000,30000,30000,30000,30000,30000,30000,30000}), 32767,32767,32767,32767,32767,32767,32767,32767));
__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
@@ -103,42 +82,21 @@ __m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsub_epi16(a, b);
}
-constexpr bool test_mm_hsub_epi16_constexpr() {
- constexpr __m128i a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
- constexpr __m128i b = _mm_setr_epi16(9,10,11,12,13,14,15,16);
-
- constexpr __m128i result = _mm_hsub_epi16(a, b);
- return match_v8si(result,1-2,3-4,5-6,7-8,9-10,11-12,13-14,15-16);
-}
-TEST_CONSTEXPR(test_mm_hsub_epi16_constexpr())
+TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){9,10,11,12,13,14,15,16}), -1,-1,-1,-1,-1,-1,-1,-1));
__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hsub_epi32(a, b);
}
-constexpr bool test_mm_hsub_epi32_constexpr() {
- constexpr __m128i a = _mm_setr_epi32(1, 2, 3, 4);
- constexpr __m128i b = _mm_setr_epi32(5,6,7,8);
-
- constexpr __m128i result = _mm_hsub_epi32(a, b);
- return match_v4si(result,1-2,3-4,5-6,7-8);
-}
-TEST_CONSTEXPR(test_mm_hsub_epi32_constexpr())
+TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,2,1}, (__m128i)(__v4si){8,7,6,5}), 1,1,1,1))
__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsubs_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsubs_epi16(a, b);
}
-constexpr bool test_mm_hsubs_epi16_constexpr() {
- constexpr __m128i a = _mm_setr_epi16(32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
- constexpr __m128i b = _mm_setr_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
- constexpr __m128i result3 = _mm_hsubs_epi16(a, b);
-
- return match_v8si(result3, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767);
-}
-TEST_CONSTEXPR(test_mm_hsubs_epi16_constexpr())
+TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},(__m128i)(__v8hi){-1,-1,-1,-1,-1,-1,-1,-1}), 32767,32767,32767,32767,32767,32767,32767,32767));
__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_maddubs_epi16
>From f91aa214f83bc2d459d43daa1c1563cf0c86b76a Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Fri, 26 Sep 2025 13:42:41 +0800
Subject: [PATCH 5/6] adjust test case and function
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 145 +++++++++--------------
clang/lib/AST/ExprConstant.cpp | 126 ++++++++++----------
clang/test/CodeGen/X86/avx-builtins.c | 8 +-
clang/test/CodeGen/X86/avx2-builtins.c | 30 ++---
clang/test/CodeGen/X86/mmx-builtins.c | 8 +-
clang/test/CodeGen/X86/ssse3-builtins.c | 8 +-
6 files changed, 143 insertions(+), 182 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a0ea1404db182..04c62bcc238bb 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -8,7 +8,6 @@
#include "../ExprConstShared.h"
#include "Boolean.h"
#include "EvalEmitter.h"
-#include "Floating.h"
#include "Interp.h"
#include "InterpBuiltinBitCast.h"
#include "PrimType.h"
@@ -20,7 +19,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
-#include <cassert>
namespace clang {
namespace interp {
@@ -2744,100 +2742,56 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call,
- uint32_t BuiltinID) {
+static bool interp_builtin_horizontal_int_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
+ assert(Call->getNumArgs() == 2);
+
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ assert(VT->getElementType()->isIntegralOrEnumerationType());
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
- const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
- PrimType ElemT = *S.getContext().classify(VT->getElementType());
unsigned SourceLen = VT->getNumElements();
assert(SourceLen % 2 == 0 &&
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
SourceLen);
- PrimType DstElemT = *S.getContext().classify(
- Call->getType()->castAs<VectorType>()->getElementType());
unsigned DstElem = 0;
- bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256);
-
- bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 ||
- BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256);
-
for (unsigned I = 0; I != SourceLen; I += 2) {
- APSInt Elem1;
- APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
- Elem1 = LHS.elem<T>(I).toAPSInt();
- Elem2 = LHS.elem<T>(I + 1).toAPSInt();
+ APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
+ APSInt Elem2 = LHS.elem<T>(I + 1).toAPSInt();
+ Dst.elem<T>(DstElem) =
+ static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
});
- APSInt Result;
- if (IsAdd) {
- if (IsSaturating) {
- Result =
- APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
- } else {
- Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
- }
- } else {
- if (IsSaturating) {
- Result =
- APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
- } else {
- Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
- }
- }
- INT_TYPE_SWITCH_NO_BOOL(DstElemT,
- { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
++DstElem;
}
for (unsigned I = 0; I != SourceLen; I += 2) {
- APSInt Elem1;
- APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
- Elem1 = RHS.elem<T>(I).toAPSInt();
- Elem2 = RHS.elem<T>(I + 1).toAPSInt();
+ APSInt Elem1 = RHS.elem<T>(I).toAPSInt();
+ APSInt Elem2 = RHS.elem<T>(I + 1).toAPSInt();
+ Dst.elem<T>(DstElem) =
+ static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
});
- APSInt Result;
- if (IsAdd) {
- if (IsSaturating) {
- Result =
- APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
- } else {
- Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
- }
- } else {
- if (IsSaturating) {
- Result =
- APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
- } else {
- Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
- }
- }
- INT_TYPE_SWITCH_NO_BOOL(DstElemT,
- { Dst.elem<T>(DstElem) = static_cast<T>(Result); });
++DstElem;
}
Dst.initializeAllElements();
return true;
}
-static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call,
- uint32_t BuiltinID) {
+static bool interp_builtin_horizontal_fp_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APFloat(const APFloat &, const APFloat &,
+ llvm::RoundingMode)>
+ Fn) {
+ assert(Call->getNumArgs() == 2);
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const Pointer &RHS = S.Stk.pop<Pointer>();
@@ -2852,30 +2806,18 @@ static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
SourceLen);
unsigned DstElem = 0;
- bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd ||
- BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 ||
- BuiltinID == clang::X86::BI__builtin_ia32_haddps ||
- BuiltinID == clang::X86::BI__builtin_ia32_haddps256);
- using T = Floating;
for (unsigned I = 0; I != SourceLen; I += 2) {
+ using T = PrimConv<PT_Float>::T;
APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
- if (IsAdd) {
- Elem1.add(Elem2, RM);
- } else {
- Elem1.subtract(Elem2, RM);
- }
- Dst.elem<T>(DstElem++) = Elem1;
+ Dst.elem<T>(DstElem++) =
+ static_cast<T>(APFloat(Fn(Elem1, Elem2, RM)));
}
for (unsigned I = 0; I != SourceLen; I += 2) {
+ using T = PrimConv<PT_Float>::T;
APFloat Elem1 = RHS.elem<T>(I).getAPFloat();
APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat();
- if (IsAdd) {
- Elem1.add(Elem2, RM);
- } else {
- Elem1.subtract(Elem2, RM);
- }
- Dst.elem<T>(DstElem++) = Elem1;
+ Dst.elem<T>(DstElem++) = static_cast<T>(APFloat(Fn(Elem1, Elem2, RM)));
}
Dst.initializeAllElements();
return true;
@@ -3596,25 +3538,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {return LHS + RHS;});
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.isSigned() ? LHS.sadd_sat(RHS) : LHS.uadd_sat(RHS);
+ });
case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256:
- return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);
-
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {
+ return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
+ });
case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddpd256:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddps256:
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.add(RHS, RM);
+ return F;
+ });
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubpd256:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubps256:
- return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.subtract(RHS, RM);
+ return F;
+ });
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 026894381e778..01e80a22fbb8d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12332,71 +12332,67 @@ bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
namespace {
class ArrayExprEvaluator
- :
- public
- ExprEvaluatorBase<ArrayExprEvaluator> {
- const LValue &This;
- APValue & Result;
-
- public:
- ArrayExprEvaluator(EvalInfo & Info, const LValue &This,
- APValue &Result)
- : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {}
-
- bool Success(const APValue &V, const Expr *E) {
- assert(V.isArray() && "expected array");
- Result = V;
- return true;
- }
-
- bool ZeroInitialization(const Expr *E) {
- const ConstantArrayType *CAT =
- Info.Ctx.getAsConstantArrayType(E->getType());
- if (!CAT) {
- if (E->getType()->isIncompleteArrayType()) {
- // We can be asked to zero-initialize a flexible array member;
- // this is represented as an ImplicitValueInitExpr of
- // incomplete array type. In this case, the array has zero
- // elements.
- Result = APValue(APValue::UninitArray(), 0, 0);
- return true;
- }
- // FIXME: We could handle VLAs here.
- return Error(E);
- }
-
- Result = APValue(APValue::UninitArray(), 0, CAT->getZExtSize());
- if (!Result.hasArrayFiller())
- return true;
-
- // Zero-initialize all elements.
- LValue Subobject = This;
- Subobject.addArray(Info, E, CAT);
- ImplicitValueInitExpr VIE(CAT->getElementType());
- return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject,
- &VIE);
- }
-
- bool VisitCallExpr(const CallExpr *E) {
- return handleCallExpr(E, Result, &This);
- }
- bool VisitInitListExpr(const InitListExpr *E,
- QualType AllocType = QualType());
- bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E);
- bool VisitCXXConstructExpr(const CXXConstructExpr *E);
- bool VisitCXXConstructExpr(const CXXConstructExpr *E,
- const LValue &Subobject, APValue *Value,
- QualType Type);
- bool VisitStringLiteral(const StringLiteral *E,
- QualType AllocType = QualType()) {
- expandStringLiteral(Info, E, Result, AllocType);
- return true;
- }
- bool VisitCXXParenListInitExpr(const CXXParenListInitExpr *E);
- bool VisitCXXParenListOrInitListExpr(
- const Expr *ExprToVisit, ArrayRef<Expr *> Args,
- const Expr *ArrayFiller, QualType AllocType = QualType());
- };
+ : public ExprEvaluatorBase<ArrayExprEvaluator> {
+ const LValue &This;
+ APValue &Result;
+ public:
+
+ ArrayExprEvaluator(EvalInfo &Info, const LValue &This, APValue &Result)
+ : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {}
+
+ bool Success(const APValue &V, const Expr *E) {
+ assert(V.isArray() && "expected array");
+ Result = V;
+ return true;
+ }
+
+ bool ZeroInitialization(const Expr *E) {
+ const ConstantArrayType *CAT =
+ Info.Ctx.getAsConstantArrayType(E->getType());
+ if (!CAT) {
+ if (E->getType()->isIncompleteArrayType()) {
+ // We can be asked to zero-initialize a flexible array member; this
+ // is represented as an ImplicitValueInitExpr of incomplete array
+ // type. In this case, the array has zero elements.
+ Result = APValue(APValue::UninitArray(), 0, 0);
+ return true;
+ }
+ // FIXME: We could handle VLAs here.
+ return Error(E);
+ }
+
+ Result = APValue(APValue::UninitArray(), 0, CAT->getZExtSize());
+ if (!Result.hasArrayFiller())
+ return true;
+
+ // Zero-initialize all elements.
+ LValue Subobject = This;
+ Subobject.addArray(Info, E, CAT);
+ ImplicitValueInitExpr VIE(CAT->getElementType());
+ return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE);
+ }
+
+ bool VisitCallExpr(const CallExpr *E) {
+ return handleCallExpr(E, Result, &This);
+ }
+ bool VisitInitListExpr(const InitListExpr *E,
+ QualType AllocType = QualType());
+ bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E);
+ bool VisitCXXConstructExpr(const CXXConstructExpr *E);
+ bool VisitCXXConstructExpr(const CXXConstructExpr *E,
+ const LValue &Subobject,
+ APValue *Value, QualType Type);
+ bool VisitStringLiteral(const StringLiteral *E,
+ QualType AllocType = QualType()) {
+ expandStringLiteral(Info, E, Result, AllocType);
+ return true;
+ }
+ bool VisitCXXParenListInitExpr(const CXXParenListInitExpr *E);
+ bool VisitCXXParenListOrInitListExpr(const Expr *ExprToVisit,
+ ArrayRef<Expr *> Args,
+ const Expr *ArrayFiller,
+ QualType AllocType = QualType());
+ };
} // end anonymous namespace
static bool EvaluateArray(const Expr *E, const LValue &This,
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3c9e184b8ba3f..0ff5a83505607 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1102,7 +1102,7 @@ __m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hsub_pd(A, B);
}
-TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__m256d){5.0, 6.0, 7.0, 8.0}), -1.0,-1.0,-1.0,-1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){1.0, 2.0, 4.0, 3.0}, (__m256d){10.0, 6.0, 16.0, 8.0}), -1.0,1.0,4.0,8.0));
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hsub_ps
@@ -1110,9 +1110,9 @@ __m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
return _mm256_hsub_ps(A, B);
}
TEST_CONSTEXPR(_mm256_hsub_ps(
- (__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f},
- (__m256){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f},
- -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))
+ (__m256){1.0f, 2.0f, 4.0f, 3.0f, 5.0f, 7.0f, 7.0f, 5.0f},
+ (__m256){9.0f, 6.0f, 11.0f, 8.0f, 13.0f, 17.0f, 15.0f, 11.0f},
+ -1.0f, 1.0f, -2.0f, 2.0f, -3.0f, 3.0f, -4.0f, 4.0f))
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index a993e2546b165..8837a8c0e52fa 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -476,8 +476,8 @@ __m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
}
TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
(__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
- (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75})
- 30,70,110,150,20,60,100,140))
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75}),
+ 30,70,110,150,20,60,100,140));
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
@@ -485,9 +485,9 @@ __m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
return _mm256_hadds_epi16(a, b);
}
TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
- (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
- (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767)))
+ (__m256i)(__v16hi){32767, 32767, 1,2,3,4,5,6,7,8,9,10,11,12,13,14},
+ (__m256i)(__v16hi){19,20,21,22,23,24,25,26,27,28,29,30,31,32, 32767, 5}),
+ 32767, 3,7,11,15,19,23,27, 39,43,47,51,55,59,63, 32767));
__m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi16
@@ -495,9 +495,9 @@ __m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
return _mm256_hsub_epi16(a, b);
}
TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16(
- (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
- (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1));
+ (__m256i)(__v16hi){2,1,1,2,5,3,3,5,7,4,4,7,9,5,5,9},
+ (__m256i)(__v16hi){10,5,5,10,12,6,6,12,21,14,14,21,24,16,16,24}),
+ 1,-1,2,-2,3,-3,4,-4,5,-5,6,-6,7,-7,8,-8));
__m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi32
@@ -505,19 +505,19 @@ __m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
return _mm256_hsub_epi32(a, b);
}
TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32(
- (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
- (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75})
- -10,-10,-10,-10,-10,-10,-10,-10))
+ (__m256i)(__v8si){10, 20, 30,50,60,90,100,140},
+ (__m256i)(__v8si){200,150,260,200,420,350,800,740}),
+ -10,-20,-30,-40,50,60,70,80));
__m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsubs_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsubs_epi16(a, b);
}
-TEST_CONSTEXPR(match_v16hi( _mm256_hsubs_epi16(
- (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
- (__m256i)(__v16hi){-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
- 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767)))
+TEST_CONSTEXPR(match_v16hi(_mm256_hsubs_epi16(
+ (__m256i)(__v16hi){32726, -100, 3, 2, 6, 4, 8, 5,15,10 ,21, 14, 27, 18, 100, 90},
+ (__m256i)(__v16hi){40, 20, 100, 70, 200,150, 100,40, 1000,900,300,150, 500,300, 1, 1}),
+ 32767, 1, 2, 3, 5, 7, 9, 10, 20, 30, 50, 60, 100, 150, 200, 0));
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index b4c86b29e5260..7c1709f81f665 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -325,28 +325,28 @@ __m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
-TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 32767, 32767},(__m64)(__v4hi){32767, 32767, 32767, 32767}),32767, 32767, 32767, 32767));
+TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 1,3},(__m64)(__v4hi){-1,3, 40, 60}),32767, 4, 2,100));
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
-TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),-1,-1,-1,-1));
+TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,4,3},(__m64)(__v4hi){10,5,0,-10}),-1,1,5,-10));
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
-TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),-1,-1));
+TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){4,3}),-1,1));
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
-TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 32767, 32767},(__m64)(__v4hi){-4,-5,-6,-7}),32767, 32767, 32767, 32767));
+TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 5, -32767},(__m64)(__v4hi){4,5,10,5}),0,32767,-1,5));
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index 61c7ee31af96f..d5b64df3f57a9 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -74,7 +74,7 @@ __m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadds_epi16(a, b);
}
-TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,30000,30000,30000,30000,30000,30000}, (__m128i)(__v8hi){30000,30000,30000,30000,30000,30000,30000,30000}), 32767,32767,32767,32767,32767,32767,32767,32767));
+TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,-1,2,-3,3,1,4}, (__m128i)(__v8hi){2,6,1,9,-4,16,7,8}), 32767, 1,0,5,8,10,12,15));
__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
@@ -82,21 +82,21 @@ __m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsub_epi16(a, b);
}
-TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){9,10,11,12,13,14,15,16}), -1,-1,-1,-1,-1,-1,-1,-1));
+TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){20,15,16,12,9,6,4,2}, (__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 5,4,3,2,1,0,-1,-2));
__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hsub_epi32(a, b);
}
-TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,2,1}, (__m128i)(__v4si){8,7,6,5}), 1,1,1,1))
+TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,1,1}, (__m128i)(__v4si){7,5,10,5}), 1,0,-1,5))
__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsubs_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsubs_epi16(a, b);
}
-TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},(__m128i)(__v8hi){-1,-1,-1,-1,-1,-1,-1,-1}), 32767,32767,32767,32767,32767,32767,32767,32767));
+TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, -15,16,12,9,6,4,2},(__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 32767,4,3,2,1,0,-1,-2));
__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_maddubs_epi16
>From 4f5fb878426471f92c3b488495ae73bd87897690 Mon Sep 17 00:00:00 2001
From: whyuuwang <whyuuwang at tencent.com>
Date: Fri, 26 Sep 2025 13:48:02 +0800
Subject: [PATCH 6/6] undo the unintentional formatting of the code
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 04c62bcc238bb..5a097e5a4f7a0 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2746,7 +2746,7 @@ static bool interp_builtin_horizontal_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
assert(Call->getNumArgs() == 2);
-
+
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
@@ -2810,8 +2810,7 @@ static bool interp_builtin_horizontal_fp_binop(
using T = PrimConv<PT_Float>::T;
APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
- Dst.elem<T>(DstElem++) =
- static_cast<T>(APFloat(Fn(Elem1, Elem2, RM)));
+ Dst.elem<T>(DstElem++) = static_cast<T>(APFloat(Fn(Elem1, Elem2, RM)));
}
for (unsigned I = 0; I != SourceLen; I += 2) {
using T = PrimConv<PT_Float>::T;
@@ -3539,7 +3538,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256:
return interp_builtin_horizontal_int_binop(
- S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) {return LHS + RHS;});
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256:
return interp_builtin_horizontal_int_binop(
@@ -3551,7 +3551,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256:
return interp_builtin_horizontal_int_binop(
- S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256:
return interp_builtin_horizontal_int_binop(
More information about the cfe-commits
mailing list