[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow MMX/SSE/AVX2 PSIGN intrinsics to be used in constexpr (PR #163685)
Shawn K via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 20 11:30:29 PDT 2025
https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/163685
>From b1b44153a4fd24b13e1d4d468ff1667de2a51147 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 20 Oct 2025 09:52:51 -0700
Subject: [PATCH 1/2] [Clang] VectorExprEvaluator::VisitCallExpr /
InterpretBuiltin - allow MMX/SSE/AVX2 PSIGN intrinsics to be used in
constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 29 ++++++++------
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 37 ++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 30 +++++++++++++++
clang/lib/Headers/avx2intrin.h | 21 +++++------
clang/lib/Headers/tmmintrin.h | 48 +++++++++++-------------
clang/test/CodeGen/X86/avx2-builtins.c | 7 ++++
clang/test/CodeGen/X86/mmx-builtins.c | 5 +++
clang/test/CodeGen/X86/ssse3-builtins.c | 3 ++
8 files changed, 130 insertions(+), 50 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index d03c778740ad3..825534436704c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}
- let Features = "ssse3" in {
- def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
- def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- }
-
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def psignb128
+ : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+ def psignw128
+ : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psignd128
+ : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
@@ -608,10 +608,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
- def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
- def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def psadbw256
+ : X86Builtin<
+ "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
@@ -682,7 +681,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
-
+
+ def psignb256
+ : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+ def psignw256
+ : X86Builtin<
+ "_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+ def psignd256
+ : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0cb491063057c..cbbc17cd7c3a5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3003,6 +3003,35 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_psign_op(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Result = S.Stk.peek<Pointer>();
+
+ unsigned ResultLen = A.getNumElems();
+ QualType ElemQT = getElemType(A);
+ OptPrimType ElemT = S.getContext().classify(ElemQT);
+ unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
+ bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
+ for (unsigned I = 0; I != ResultLen; ++I) {
+ APSInt AElem = A.elem<T>(I).toAPSInt();
+ APSInt BElem = B.elem<T>(I).toAPSInt();
+ APSInt ResultElem =
+ (BElem.isNegative() ? -AElem
+ : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
+ : AElem);
+ Result.elem<T>(I) = static_cast<T>(ResultElem);
+ }
+ });
+ Result.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
@@ -3630,6 +3659,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
.extractBits(16, 1);
});
+ case X86::BI__builtin_ia32_psignb128:
+ case X86::BI__builtin_ia32_psignb256:
+ case X86::BI__builtin_ia32_psignw128:
+ case X86::BI__builtin_ia32_psignw256:
+ case X86::BI__builtin_ia32_psignd128:
+ case X86::BI__builtin_ia32_psignd256:
+ return interp__builtin_ia32_psign_op(S, OpPC, Call);
+
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e308c171ed551..b9216cd89d7b7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12245,6 +12245,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_psignb128:
+ case X86::BI__builtin_ia32_psignb256:
+ case X86::BI__builtin_ia32_psignw128:
+ case X86::BI__builtin_ia32_psignw256:
+ case X86::BI__builtin_ia32_psignd128:
+ case X86::BI__builtin_ia32_psignd256: {
+ APValue ASource, BSource;
+ if (!EvaluateAsRValue(Info, E->getArg(0), ASource) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BSource))
+ return false;
+ unsigned SourceLen = ASource.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+ unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
+ bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ SmallVector<APValue, 16> Result;
+ Result.reserve(SourceLen);
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APSInt &AElem = ASource.getVectorElt(I).getInt();
+ APSInt &BElem = BSource.getVectorElt(I).getInt();
+ APSInt ResultElem =
+ (BElem.isNegative() ? -AElem
+ : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
+ : AElem);
+ Result.emplace_back(ResultElem);
+ }
+ return Success(APValue(Result.data(), Result.size()), E);
+ }
+
case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
case X86::BI__builtin_ia32_blendvps:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index d35bc0e84a7a1..757085a930122 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1976,10 +1976,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) {
/// \param __b
/// A 256-bit integer vector].
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi8(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi8(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -1997,10 +1996,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -2018,10 +2016,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
}
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 5d0f20f4d527d..95b32c0087ecb 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -641,10 +641,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) {
/// A 128-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi8(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi8(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
}
/// For each 16-bit integer in the first source operand, perform one of
@@ -667,10 +666,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
}
/// For each 32-bit integer in the first source operand, perform one of
@@ -693,10 +691,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control doublewords corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
}
/// For each 8-bit integer in the first source operand, perform one of
@@ -719,11 +716,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi8(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
+ (__v16qi)__zext128(__b)));
}
/// For each 16-bit integer in the first source operand, perform one of
@@ -746,11 +742,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
- (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(
+ __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
}
/// For each 32-bit integer in the first source operand, perform one of
@@ -773,11 +768,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(
+ __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
}
#undef __anyext128
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index a505d70a98203..c785363e07f53 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1151,23 +1151,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) {
return _mm256_shufflelo_epi16(a, 83);
}
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
+
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi8
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_sign_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8(
+ (__m256i)(__v32qi){'B','r','i','g','h','t','n','e','o','n','f','o','x','j','u','m','p','s','o','v','e','r','p','r','o','g','r','a','m','m','e','r'},
+ (__m256i)(__v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'t','h','i','s'}),
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'m','m','e','r'));
__m256i test_mm256_sign_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_sign_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,'h','i', 'b','y','e','!'}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0));
__m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_sign_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed,'o','o','p','s'}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, -'o',-'o',-'p',-'s'));
__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index c1ac57b44e58d..0065b87108914 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -598,23 +598,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
return _mm_shuffle_pi16(a, 3);
}
TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));
+
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi8
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
return _mm_sign_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0));
__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
return _mm_sign_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0));
__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
return _mm_sign_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1));
__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi16
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index f70afc01a1963..d3fde1e2de653 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_sign_epi8(a, b);
}
+TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qi){'g','r','i','n','d','i','n','g', 'l','e','e','t','c','o','d','e'}, (__m128i)(__v16qi){0,1,0,1, 1,1,0,0, 0,0,1,1, 1,0,1,0}), 0,'r',0,'n', 'd','i',0,0, 0,0,'e','t', 'c',0,'d',0));
__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sign_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8));
__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sign_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4));
>From 7f68ad82666d4f9a14b8bb9d62f829e6f6aa652d Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 20 Oct 2025 11:30:16 -0700
Subject: [PATCH 2/2] Apply feedback
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 36 ++++--------------------
clang/lib/AST/ExprConstant.cpp | 31 +++++---------------
2 files changed, 13 insertions(+), 54 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cbbc17cd7c3a5..dc16bb1144078 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3003,35 +3003,6 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_ia32_psign_op(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
- assert(Call->getNumArgs() == 2);
-
- const Pointer &B = S.Stk.pop<Pointer>();
- const Pointer &A = S.Stk.pop<Pointer>();
- const Pointer &Result = S.Stk.peek<Pointer>();
-
- unsigned ResultLen = A.getNumElems();
- QualType ElemQT = getElemType(A);
- OptPrimType ElemT = S.getContext().classify(ElemQT);
- unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
- bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
-
- INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
- for (unsigned I = 0; I != ResultLen; ++I) {
- APSInt AElem = A.elem<T>(I).toAPSInt();
- APSInt BElem = B.elem<T>(I).toAPSInt();
- APSInt ResultElem =
- (BElem.isNegative() ? -AElem
- : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
- : AElem);
- Result.elem<T>(I) = static_cast<T>(ResultElem);
- }
- });
- Result.initializeAllElements();
- return true;
-}
-
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
@@ -3665,7 +3636,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_psignw256:
case X86::BI__builtin_ia32_psignd128:
case X86::BI__builtin_ia32_psignd256:
- return interp__builtin_ia32_psign_op(S, OpPC, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt {
+ return BElem.isNegative() ? static_cast<const APInt &>(-AElem)
+ : BElem.isZero() ? APInt(AElem.getBitWidth(), 0)
+ : static_cast<const APInt &>(AElem);
+ });
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b9216cd89d7b7..e995e40273a71 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12250,30 +12250,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_psignw128:
case X86::BI__builtin_ia32_psignw256:
case X86::BI__builtin_ia32_psignd128:
- case X86::BI__builtin_ia32_psignd256: {
- APValue ASource, BSource;
- if (!EvaluateAsRValue(Info, E->getArg(0), ASource) ||
- !EvaluateAsRValue(Info, E->getArg(1), BSource))
- return false;
- unsigned SourceLen = ASource.getVectorLength();
- const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
- QualType ElemQT = VT->getElementType();
- unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
- bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
-
- SmallVector<APValue, 16> Result;
- Result.reserve(SourceLen);
- for (unsigned I = 0; I != SourceLen; ++I) {
- APSInt &AElem = ASource.getVectorElt(I).getInt();
- APSInt &BElem = BSource.getVectorElt(I).getInt();
- APSInt ResultElem =
- (BElem.isNegative() ? -AElem
- : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
- : AElem);
- Result.emplace_back(ResultElem);
- }
- return Success(APValue(Result.data(), Result.size()), E);
- }
+ case X86::BI__builtin_ia32_psignd256:
+ return EvaluateBinOpExpr(
+ [](const APSInt &AElem, const APSInt &BElem) -> APInt {
+ return BElem.isNegative() ? static_cast<const APInt &>(-AElem)
+ : BElem.isZero() ? APInt(AElem.getBitWidth(), 0)
+ : static_cast<const APInt &>(AElem);
+ });
case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
More information about the cfe-commits
mailing list