[clang] add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #155395 (PR #156822)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 4 01:16:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: why (whytolearn)
<details>
<summary>Changes</summary>
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #<!-- -->155395
cover func:
_mm_hadd_pi16 _mm_hadd_epi16 _mm256_hadd_epi16
_mm_hadd_pi32 _mm_hadd_epi32 _mm256_hadd_epi32
_mm_hadds_pi16 _mm_hadds_epi16 _mm256_hadds_epi16
_mm_hsub_pi16 _mm_hsub_epi16 _mm256_hsub_epi16
_mm_hsub_pi32 _mm_hsub_epi32 _mm256_hsub_epi32
_mm_hsubs_pi16 _mm_hsubs_epi16 _mm256_hsubs_epi16
_mm_hadd_pd _mm256_hadd_pd
_mm_hadd_ps _mm256_hadd_ps
_mm_hsub_pd _mm256_hsub_pd
_mm_hsub_ps _mm256_hsub_ps
---
Patch is 32.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156822.diff
8 Files Affected:
- (modified) clang/lib/Headers/avx2intrin.h (+18-24)
- (modified) clang/lib/Headers/avxintrin.h (+8-12)
- (modified) clang/lib/Headers/pmmintrin.h (+8-12)
- (modified) clang/lib/Headers/tmmintrin.h (+52-67)
- (modified) clang/test/CodeGen/X86/avx-builtins.c (+29)
- (modified) clang/test/CodeGen/X86/avx2-builtins.c (+63)
- (modified) clang/test/CodeGen/X86/mmx-builtins.c (+48)
- (modified) clang/test/CodeGen/X86/ssse3-builtins.c (+49)
``````````diff
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 4d03103ac1e08..54fe41cad6a46 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -854,10 +854,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -886,10 +885,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -921,10 +919,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadds_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadds_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -957,10 +954,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -989,10 +985,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1025,10 +1020,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsubs_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
}
/// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 03c034f64cbea..fb7110f145c38 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -703,9 +703,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
/// elements of a vector of [4 x double].
/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
/// both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hadd_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hadd_pd(__m256d __a, __m256d __b) {
return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
}
@@ -726,9 +725,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
/// index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
/// both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hadd_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a,
+ __m256 __b) {
return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
}
@@ -749,9 +747,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b)
/// odd-indexed elements of a vector of [4 x double].
/// \returns A 256-bit vector of [4 x double] containing the horizontal
/// differences of both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hsub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hsub_pd(__m256d __a, __m256d __b) {
return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
}
@@ -772,9 +769,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
/// elements with index 2, 3, 6, 7 of a vector of [8 x float].
/// \returns A 256-bit vector of [8 x float] containing the horizontal
/// differences of both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
+ __m256 __b) {
return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
}
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index cd605df7fb52d..67f2a7ffd1f56 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -89,9 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
/// destination.
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
/// both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hadd_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
}
@@ -174,9 +173,8 @@ _mm_moveldup_ps(__m128 __a)
/// A 128-bit vector of [2 x double] containing the right source operand.
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
/// and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_addsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
@@ -197,9 +195,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
/// destination.
/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
/// both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hadd_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
}
@@ -220,9 +217,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
/// the destination.
/// \returns A 128-bit vector of [2 x double] containing the horizontal
/// differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index f01c61afa8ea2..b408c6a3404ec 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -204,10 +204,9 @@ _mm_abs_epi32(__m128i __a) {
/// destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
/// both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -227,10 +226,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
/// both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -250,11 +248,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -274,11 +271,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddd128(
- (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -301,10 +297,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
/// destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadds_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadds_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -327,11 +322,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadds_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phaddsw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phaddsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -351,10 +345,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -374,10 +367,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -397,11 +389,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -421,11 +412,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubd128(
- (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubd128(
+ (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
}
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -448,10 +438,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsubs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsubs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
}
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -474,11 +463,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsubs_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_phsubsw128(
- (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_phsubsw128(
+ (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -509,10 +497,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maddubs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -539,11 +526,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_maddubs_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_pi16(__m64 __a, __m64 __b) {
+ return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
+ (__v16qi)__anyext128(__b)));
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -560,10 +546,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
/// A 128-bit vector of [8 x i16] containing one of the source operands.
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
/// products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mulhrs_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mulhrs_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 622ac5d50aaf0..9857b84c94112 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1083,24 +1083,53 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hadd_pd(A, B);
}
+constexpr bool test_mm256_hadd_epi32_constexpr() {
+ constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
+ constexpr __m256d result = _mm256_hadd_pd(a, b);
+ return match_m256d(result,1.0+2.0,3.0+4.0,5.0+6.0,7.0+8.0);
+}
+TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hadd_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hadd_ps(A, B);
}
+constexpr bool test_mm256_hadd_ps_constexpr() {
+ constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
+ constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
+ constexpr __m256 result = _mm256_hadd_ps(a, b);
+ return match_m256(result,1.0f+2.0f,3.0f+4.0f,5.0f+6.0f,7.0f+8.0f,
+ 9.0f+10.0f,11.0f+12.0f,13.0f+14.0f,15.0f+16.0f);
+}
+TEST_CONSTEXPR(test_mm256_hadd_ps_constexpr())
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156822
More information about the cfe-commits
mailing list