[clang] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used in constexpr (PR #160428)
Shawn K via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 2 09:54:14 PDT 2025
https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/160428
>From 68df3b7879fe7a167cba1108b64bfdd9283bb02d Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 2 Oct 2025 05:44:38 -0700
Subject: [PATCH 1/2] Squash
---
clang/include/clang/Basic/BuiltinsX86.td | 27 +++++--
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 89 +++++++++++++++++++++++-
clang/lib/AST/ExprConstant.cpp | 76 +++++++++++++++++++-
clang/lib/Headers/avxintrin.h | 75 ++++++++------------
clang/lib/Headers/smmintrin.h | 12 ++--
clang/test/CodeGen/X86/avx-builtins.c | 59 ++++++++++++++++
clang/test/CodeGen/X86/sse41-builtins.c | 20 ++++++
7 files changed, 297 insertions(+), 61 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e98bee28c15be..877782c0dfdeb 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -317,10 +317,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
- def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
+ "_Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
@@ -328,6 +326,16 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def ptestz128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestnzc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -513,8 +521,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
}
-
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -523,7 +531,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -533,6 +542,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx",
+ Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..382e7c8d4a95d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2773,6 +2773,66 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_test_op(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+ assert(LHS.getFieldDesc()->isPrimitiveArray() &&
+ RHS.getFieldDesc()->isPrimitiveArray());
+
+ if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS),
+ getElemType(RHS)))
+ return false;
+
+ const unsigned SourceLen = LHS.getNumElems();
+ const QualType ElemQT = getElemType(LHS);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+
+ if (ElemQT->isIntegerType()) {
+ APInt FirstElem;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT,
+ { FirstElem = LHS.elem<T>(0).toAPSInt(); });
+ const unsigned LaneWidth = FirstElem.getBitWidth();
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ ALane = LHS.elem<T>(I).toAPSInt();
+ BLane = RHS.elem<T>(I).toAPSInt();
+ });
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType());
+ return true;
+ } else if (ElemQT->isFloatingType()) {
+ APInt ASignBits(SourceLen, 0);
+ APInt BSignBits(SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ APInt ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ APInt BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = ALane.getBitWidth() - 1;
+ const bool ALaneSign = ALane[SignBit];
+ const bool BLaneSign = BLane[SignBit];
+ ASignBits.setBitVal(I, ALaneSign);
+ BSignBits.setBitVal(I, BLaneSign);
+ }
+ pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType());
+ return true;
+ } else { // Must be integer or float type
+ return false;
+ }
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3579,7 +3639,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call, [](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..f295f2aff9385 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13650,6 +13650,51 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
+ auto EvalTestOp =
+ [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+
+ if (ElemQT->isIntegerType()) {
+ const unsigned LaneWidth =
+ SourceLHS.getVectorElt(0).getInt().getBitWidth();
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane = SourceLHS.getVectorElt(I).getInt();
+ APInt BLane = SourceRHS.getVectorElt(I).getInt();
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ return Success(Fn(AWide, BWide), E);
+
+ } else if (ElemQT->isFloatingType()) {
+ APInt ASignBits(SourceLen, 0);
+ APInt BSignBits(SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = ALane.getBitWidth() - 1;
+ const bool ALaneSign = ALane[SignBit];
+ const bool BLaneSign = BLane[SignBit];
+ ASignBits.setBitVal(I, ALaneSign);
+ BSignBits.setBitVal(I, BLaneSign);
+ }
+ return Success(Fn(ASignBits, BSignBits), E);
+
+ } else { // Must be integer or float type
+ return false;
+ }
+ };
auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
@@ -14763,7 +14808,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256: {
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
+ }
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
@@ -14822,7 +14894,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}
- }
+}
}
/// Determine whether this is a pointer past the end of the complete
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index d6ba19a6c78af..123fa7933c4f8 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
}
@@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
}
@@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
}
@@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
}
@@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
}
@@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
}
@@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
}
@@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 5e63a1ae321bc..4f197d5ecaff9 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) {
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testz_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
@@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all ones; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
@@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are neither all zeros nor all ones;
/// FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testnzc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3018bb9719b89..40a3bd68fbae1 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2001,90 +2001,149 @@ int test_mm_testc_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testc_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0},
+ (__m128d)(__v2df){-3.0, 4.0}) == 1);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+ (__m128d)(__v2df){-3.0, 4.0}) == 0);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+ (__m128d)(__v2df){ 0.0, 5.0}) == 1);
int test_mm256_testc_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testc_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){-1.0, 2.0, -3.0, 4.0},
+ (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){ 1.0, 2.0, -3.0, 4.0},
+ (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0},
+ (__m256d)(__v4df){ 5.0, 6.0, 7.0, 8.0}) == 1);
int test_mm_testc_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testc_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-1,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 1);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,2384.23,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 0);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-2,-9001.1009,-93}, (__m128)(__v4sf){-1.0,-9001,-0.9001,-1000}) == 1);
int test_mm256_testc_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testc_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
int test_mm256_testc_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testc_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testc_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,1,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){-1,-2,1,3}, (__m256i)(__v4di){0,-1,1,1}) == 0);
int test_mm_testnzc_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_testnzc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testnzc_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0},
+ (__m128d)(__v2df){-3.0, -4.0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0},
+ (__m128d)(__v2df){+3.0, -4.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0},
+ (__m128d)(__v2df){-3.0, +4.0}) == 0);
int test_mm256_testnzc_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testnzc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testnzc_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
int test_mm_testnzc_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testnzc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testnzc_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-9.9,987,-67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-810.0,-1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 0);
int test_mm256_testnzc_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testnzc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testnzc_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, -2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
int test_mm256_testnzc_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testnzc_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testnzc_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){1,0,0,0}, (__m256i)(__v4di){3,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0);
int test_mm_testz_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_testz_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testz_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){-1,0}, (__m128d)(__v2df){0,-1}) == 1);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,-13.13}, (__m128d)(__v2df){0,-11.1}) == 0);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,5.13}, (__m128d)(__v2df){0,-113.1324823}) == 1);
+
int test_mm256_testz_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testz_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testz_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,0,-47.47,0.00002}, (__m256d)(__v4df){0,-1,74.0101,-1}) == 1);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,3249.9,-47.47,-0.00002}, (__m256d)(__v4df){0,-1,74.0101,-9999900}) == 0);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){0,0,-8,0}, (__m256d)(__v4df){0,-1,-101,-123}) == 0);
int test_mm_testz_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testz_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testz_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){-9.9,987,67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){10.0,1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 1);
int test_mm256_testz_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testz_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testz_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 1);
int test_mm256_testz_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testz_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testz_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,-1,0}) == 0);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){-1,0,1,0}, (__m256i)(__v4di){0,-1,0,1}) == 1);
__m256 test_mm256_undefined_ps(void) {
// X64-LABEL: test_mm256_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 3c3724643870e..a3e1fcc8426fa 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -442,33 +442,53 @@ int test_mm_test_all_ones(__m128i x) {
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_all_ones(x);
}
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, -1})) == 1);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v4si){-1, -1, -1, 0x7FFFFFFF})) == 0);
int test_mm_test_all_zeros(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_test_all_zeros
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_all_zeros(x, y);
}
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,0}), ((__m128i)(__v2di){0,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0xFF00,0}), ((__m128i)(__v2di){0x00FF,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){1,0}), ((__m128i)(__v2di){-1,0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,1}), ((__m128i)(__v2di){0,-1})) == 0);
int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_test_mix_ones_zeros
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_mix_ones_zeros(x, y);
}
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xFF, 0}), ((__m128i)(__v2di){0xF0, 1})) == 1);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xF0, 0}), ((__m128i)(__v2di){0x0F, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){-1, -1}), ((__m128i)(__v2di){1, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0, 0}), ((__m128i)(__v2di){0, 0})) == 0);
int test_mm_testc_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testc_si128
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testc_si128(x, y);
}
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,-1}, (__m128i)(__v2di){0,1}) == 1);
int test_mm_testnzc_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testnzc_si128
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testnzc_si128(x, y);
}
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){3,0}, (__m128i)(__v2di){1,1}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){32,-1}, (__m128i)(__v2di){15,0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){0,999}, (__m128i)(__v2di){0,999}) == 0);
int test_mm_testz_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testz_si128
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testz_si128(x, y);
}
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){0,1}) == 1);
>From 8f4d93e8a75f1c096e383ed06ebb637c25c91372 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 2 Oct 2025 09:53:47 -0700
Subject: [PATCH 2/2] Apply feedback from Simon and Timm
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 51 ++++++++------------
clang/lib/AST/ExprConstant.cpp | 59 ++++++++++--------------
2 files changed, 44 insertions(+), 66 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 382e7c8d4a95d..b27441f8b73b5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2780,6 +2780,7 @@ static bool interp__builtin_ia32_test_op(
const Pointer &LHS = S.Stk.pop<Pointer>();
assert(LHS.getNumElems() == RHS.getNumElems());
+
assert(LHS.getFieldDesc()->isPrimitiveArray() &&
RHS.getFieldDesc()->isPrimitiveArray());
@@ -2787,50 +2788,36 @@ static bool interp__builtin_ia32_test_op(
getElemType(RHS)))
return false;
- const unsigned SourceLen = LHS.getNumElems();
+ unsigned SourceLen = LHS.getNumElems();
const QualType ElemQT = getElemType(LHS);
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+ APInt SignMask = APInt::getSignMask(LaneWidth);
- if (ElemQT->isIntegerType()) {
- APInt FirstElem;
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT,
- { FirstElem = LHS.elem<T>(0).toAPSInt(); });
- const unsigned LaneWidth = FirstElem.getBitWidth();
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
- APInt AWide(LaneWidth * SourceLen, 0);
- APInt BWide(LaneWidth * SourceLen, 0);
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
- for (unsigned I = 0; I != SourceLen; ++I) {
- APInt ALane;
- APInt BLane;
+ if (ElemQT->isIntegerType()) { // Get value
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
ALane = LHS.elem<T>(I).toAPSInt();
BLane = RHS.elem<T>(I).toAPSInt();
});
- AWide.insertBits(ALane, I * LaneWidth);
- BWide.insertBits(BLane, I * LaneWidth);
- }
- pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType());
- return true;
- } else if (ElemQT->isFloatingType()) {
- APInt ASignBits(SourceLen, 0);
- APInt BSignBits(SourceLen, 0);
-
- for (unsigned I = 0; I != SourceLen; ++I) {
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit
using T = PrimConv<PT_Float>::T;
- APInt ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- APInt BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = ALane.getBitWidth() - 1;
- const bool ALaneSign = ALane[SignBit];
- const bool BLaneSign = BLane[SignBit];
- ASignBits.setBitVal(I, ALaneSign);
- BSignBits.setBitVal(I, BLaneSign);
+ ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
+ BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
+ } else { // Must be integer or floating type
+ return false;
}
- pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType());
- return true;
- } else { // Must be integer or float type
- return false;
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
}
+ pushInteger(S, Fn(AWide, BWide), Call->getType());
+ return true;
}
static bool interp__builtin_elementwise_triop(
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index f295f2aff9385..4c9136f6003c0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13658,42 +13658,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return false;
unsigned SourceLen = SourceLHS.getVectorLength();
-
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
- QualType ElemQT = VT->getElementType();
-
- if (ElemQT->isIntegerType()) {
- const unsigned LaneWidth =
- SourceLHS.getVectorElt(0).getInt().getBitWidth();
- APInt AWide(LaneWidth * SourceLen, 0);
- APInt BWide(LaneWidth * SourceLen, 0);
-
- for (unsigned I = 0; I != SourceLen; ++I) {
- APInt ALane = SourceLHS.getVectorElt(I).getInt();
- APInt BLane = SourceRHS.getVectorElt(I).getInt();
- AWide.insertBits(ALane, I * LaneWidth);
- BWide.insertBits(BLane, I * LaneWidth);
- }
- return Success(Fn(AWide, BWide), E);
-
- } else if (ElemQT->isFloatingType()) {
- APInt ASignBits(SourceLen, 0);
- APInt BSignBits(SourceLen, 0);
-
- for (unsigned I = 0; I != SourceLen; ++I) {
- APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = ALane.getBitWidth() - 1;
- const bool ALaneSign = ALane[SignBit];
- const bool BLaneSign = BLane[SignBit];
- ASignBits.setBitVal(I, ALaneSign);
- BSignBits.setBitVal(I, BLaneSign);
+ const QualType ElemQT = VT->getElementType();
+ unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
+ APInt SignMask = APInt::getSignMask(LaneWidth);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+
+ if (ElemQT->isIntegerType()) { // Get value
+ ALane = SourceLHS.getVectorElt(I).getInt();
+ BLane = SourceRHS.getVectorElt(I).getInt();
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit
+ ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt() &
+ SignMask;
+ BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt() &
+ SignMask;
+ } else { // Must be integer or floating type
+ return false;
}
- return Success(Fn(ASignBits, BSignBits), E);
-
- } else { // Must be integer or float type
- return false;
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
}
+ return Success(Fn(AWide, BWide), E);
};
auto HandleMaskBinOp =
@@ -14894,7 +14885,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}
-}
+ }
}
/// Determine whether this is a pointer past the end of the complete
More information about the cfe-commits
mailing list