[clang] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used in constexpr (PR #160428)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 26 01:14:07 PDT 2025
https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/160428
>From 00ec5f957ce4c63ba9ba377974ba1f8db588822b Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 14:21:55 -0700
Subject: [PATCH 01/21] Mark vector test intrinsics constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 26 ++++++--
clang/lib/Headers/avxintrin.h | 75 ++++++++++--------------
clang/lib/Headers/smmintrin.h | 12 ++--
3 files changed, 56 insertions(+), 57 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 044c755d4d7cf..956770126c62c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,10 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
- def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
+ "double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
@@ -329,6 +327,16 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def ptestz128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestnzc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -514,7 +522,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -523,7 +532,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -533,6 +543,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx",
+ Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a7f70994be9db..b37149709c962 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2542,9 +2542,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
}
@@ -2571,9 +2570,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
}
@@ -2601,9 +2599,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
}
@@ -2630,9 +2627,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
}
@@ -2659,9 +2655,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2689,9 +2684,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2718,9 +2712,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
}
@@ -2747,9 +2740,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2777,9 +2769,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2806,9 +2797,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2835,9 +2825,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2865,9 +2854,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2891,9 +2879,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
}
@@ -2917,9 +2904,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
}
@@ -2944,9 +2930,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 6319fdbbeb8f0..062e831259c7f 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) {
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testz_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
@@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all ones; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
@@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are neither all zeros nor all ones;
/// FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testnzc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
>From 99b5ecf761a0637e44569f57cd0cb049b4485ba3 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 17:48:07 -0700
Subject: [PATCH 02/21] Add builtin case statements in
VectorExprEvaluator::VisitCallExpr
---
clang/lib/AST/ExprConstant.cpp | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 30ae3f8802f14..ac5994e7e5e61 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,6 +12025,33 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
+
+ // case X86::BI__builtin_ia32_ptestnzc128:
+ // case X86::BI__builtin_ia32_ptestnzc256:
+
+ // case X86::BI__builtin_ia32_vtestzps:
+ // case X86::BI__builtin_ia32_vtestzps256:
+
+ // case X86::BI__builtin_ia32_vtestcps:
+ // case X86::BI__builtin_ia32_vtestcps256:
+
+ // case X86::BI__builtin_ia32_vtestnzcps:
+ // case X86::BI__builtin_ia32_vtestnzcps256:
+
+ // case X86::BI__builtin_ia32_vtestzpd:
+ // case X86::BI__builtin_ia32_vtestzpd256:
+
+ // case X86::BI__builtin_ia32_vtestcpd:
+ // case X86::BI__builtin_ia32_vtestcpd256:
+
+ // case X86::BI__builtin_ia32_vtestnzcpd:
+ // case X86::BI__builtin_ia32_vtestnzcpd256:
+
case Builtin::BI__builtin_elementwise_ctlz:
case Builtin::BI__builtin_elementwise_cttz: {
APValue SourceLHS;
>From bcca2d1e8c27ec84598b6de76672113b3579db6c Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 16:09:14 -0700
Subject: [PATCH 03/21] Add builtin case statements in InterpBuiltin.cpp
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 64962ee13d6b0..9171938ed89ae 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,6 +3580,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
+
+ // case X86::BI__builtin_ia32_ptestnzc128:
+ // case X86::BI__builtin_ia32_ptestnzc256:
+
+ // case X86::BI__builtin_ia32_vtestzps:
+ // case X86::BI__builtin_ia32_vtestzps256:
+
+ // case X86::BI__builtin_ia32_vtestcps:
+ // case X86::BI__builtin_ia32_vtestcps256:
+
+ // case X86::BI__builtin_ia32_vtestnzcps:
+ // case X86::BI__builtin_ia32_vtestnzcps256:
+
+ // case X86::BI__builtin_ia32_vtestzpd:
+ // case X86::BI__builtin_ia32_vtestzpd256:
+
+ // case X86::BI__builtin_ia32_vtestcpd:
+ // case X86::BI__builtin_ia32_vtestcpd256:
+
+ // case X86::BI__builtin_ia32_vtestnzcpd:
+ // case X86::BI__builtin_ia32_vtestnzcpd256:
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
>From d6537dea629c063e6e7c17ab0d2d4c453966b2f1 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 21:45:27 -0700
Subject: [PATCH 04/21] Stash
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 +-
clang/lib/AST/ExprConstant.cpp | 54 +++++++++++++++++++++++-
clang/test/CodeGen/X86/sse41-builtins.c | 10 +----
3 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9171938ed89ae..f77e6ec1eca7b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,8 +3580,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
- case X86::BI__builtin_ia32_ptestz128:
- case X86::BI__builtin_ia32_ptestz256:
+ // case X86::BI__builtin_ia32_ptestz128:
+ // case X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
// case X86::BI__builtin_ia32_ptestc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ac5994e7e5e61..76788a2c41315 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,8 +12025,58 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_ptestz128:
- case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_ptestz128: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ bool Flag = true;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if ((A & B) != 0) {
+ Flag = false;
+ break;
+ }
+ }
+
+ QualType ResultType = E->getType();
+ unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ return Success(APValue(Result), E);
+
+ // auto *DestTy = E->getType()->castAs<VectorType>();
+ // QualType DestEltTy = DestTy->getElementType();
+ // bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ // const unsigned SourceLen = SourceLHS.getVectorLength();
+ // SmallVector<APValue, 4> ResultElements;
+ // ResultElements.reserve(SourceLen);
+
+ // unsigned BitWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
+
+ // auto PopulateResultElements = [&](bool Flag) {
+ // for (unsigned I = 0; I < SourceLen - 1; ++I) {
+ // ResultElements.emplace_back(APSInt(APInt::getZero(BitWidth), DestUnsigned));
+ // }
+ // ResultElements.emplace_back(APSInt(APInt(BitWidth, Flag), DestUnsigned));
+ // };
+
+ // for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ // const APInt &A = SourceLHS.getVectorElt(EltNum).getInt();
+ // const APInt &B = SourceRHS.getVectorElt(EltNum).getInt();
+ // if ((A & B) != 0) {
+ // PopulateResultElements(false);
+ // return Success(APValue(ResultElements.data(), SourceLen), E);
+ // }
+ // }
+ // PopulateResultElements(true);
+ // return Success(APValue(ResultElements.data(), SourceLen), E);
+ }
+
+ // case clang::X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
// case X86::BI__builtin_ia32_ptestc256:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index c7265b188d572..18cc8d582d408 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -7,14 +7,7 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+
#include <immintrin.h>
@@ -471,3 +464,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testz_si128(x, y);
}
+// TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
>From dc03a377c9402cd0faf9f567c861a9dc99061dab Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:03:52 -0700
Subject: [PATCH 05/21] Commit changes for rebase
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++++-
clang/lib/AST/ExprConstant.cpp | 31 ++----------------------
2 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f77e6ec1eca7b..a255c5f52cb3c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,7 +3580,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
- // case X86::BI__builtin_ia32_ptestz128:
+
+ case X86::BI__builtin_ia32_ptestz128: {
+
+ }
+
// case X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 76788a2c41315..4be9e6caf49c0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12047,36 +12047,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
APSInt Result(APInt(BitWidth, Flag), ResultSigned);
return Success(APValue(Result), E);
+ }
- // auto *DestTy = E->getType()->castAs<VectorType>();
- // QualType DestEltTy = DestTy->getElementType();
- // bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
- // const unsigned SourceLen = SourceLHS.getVectorLength();
- // SmallVector<APValue, 4> ResultElements;
- // ResultElements.reserve(SourceLen);
-
- // unsigned BitWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
-
- // auto PopulateResultElements = [&](bool Flag) {
- // for (unsigned I = 0; I < SourceLen - 1; ++I) {
- // ResultElements.emplace_back(APSInt(APInt::getZero(BitWidth), DestUnsigned));
- // }
- // ResultElements.emplace_back(APSInt(APInt(BitWidth, Flag), DestUnsigned));
- // };
-
- // for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
- // const APInt &A = SourceLHS.getVectorElt(EltNum).getInt();
- // const APInt &B = SourceRHS.getVectorElt(EltNum).getInt();
- // if ((A & B) != 0) {
- // PopulateResultElements(false);
- // return Success(APValue(ResultElements.data(), SourceLen), E);
- // }
- // }
- // PopulateResultElements(true);
- // return Success(APValue(ResultElements.data(), SourceLen), E);
- }
-
- // case clang::X86::BI__builtin_ia32_ptestz256:
+ // case X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
// case X86::BI__builtin_ia32_ptestc256:
>From f04932c1327846d1f5660d2a9d6ea38eb86f9d38 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:31:11 -0700
Subject: [PATCH 06/21] Commit changes for clang-format
---
clang/include/clang/Basic/BuiltinsX86.td | 1 -
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 37 ++++++++++++------------
clang/lib/AST/ExprConstant.cpp | 34 +++++++++++-----------
3 files changed, 35 insertions(+), 37 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 956770126c62c..ff6e3cabe5f6e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -521,7 +521,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
}
-
let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a255c5f52cb3c..d7f3e5dff8126 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,36 +3580,35 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
-
- case X86::BI__builtin_ia32_ptestz128: {
+ case X86::BI__builtin_ia32_ptestz128: {
}
- // case X86::BI__builtin_ia32_ptestz256:
+ // case X86::BI__builtin_ia32_ptestz256:
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
- // case X86::BI__builtin_ia32_ptestnzc128:
- // case X86::BI__builtin_ia32_ptestnzc256:
+ // case X86::BI__builtin_ia32_ptestnzc128:
+ // case X86::BI__builtin_ia32_ptestnzc256:
- // case X86::BI__builtin_ia32_vtestzps:
- // case X86::BI__builtin_ia32_vtestzps256:
+ // case X86::BI__builtin_ia32_vtestzps:
+ // case X86::BI__builtin_ia32_vtestzps256:
- // case X86::BI__builtin_ia32_vtestcps:
- // case X86::BI__builtin_ia32_vtestcps256:
+ // case X86::BI__builtin_ia32_vtestcps:
+ // case X86::BI__builtin_ia32_vtestcps256:
- // case X86::BI__builtin_ia32_vtestnzcps:
- // case X86::BI__builtin_ia32_vtestnzcps256:
+ // case X86::BI__builtin_ia32_vtestnzcps:
+ // case X86::BI__builtin_ia32_vtestnzcps256:
- // case X86::BI__builtin_ia32_vtestzpd:
- // case X86::BI__builtin_ia32_vtestzpd256:
+ // case X86::BI__builtin_ia32_vtestzpd:
+ // case X86::BI__builtin_ia32_vtestzpd256:
- // case X86::BI__builtin_ia32_vtestcpd:
- // case X86::BI__builtin_ia32_vtestcpd256:
+ // case X86::BI__builtin_ia32_vtestcpd:
+ // case X86::BI__builtin_ia32_vtestcpd256:
- // case X86::BI__builtin_ia32_vtestnzcpd:
- // case X86::BI__builtin_ia32_vtestnzcpd256:
+ // case X86::BI__builtin_ia32_vtestnzcpd:
+ // case X86::BI__builtin_ia32_vtestnzcpd256:
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4be9e6caf49c0..66280b65d7578 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12049,31 +12049,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(Result), E);
}
- // case X86::BI__builtin_ia32_ptestz256:
+ // case X86::BI__builtin_ia32_ptestz256:
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
- // case X86::BI__builtin_ia32_ptestnzc128:
- // case X86::BI__builtin_ia32_ptestnzc256:
+ // case X86::BI__builtin_ia32_ptestnzc128:
+ // case X86::BI__builtin_ia32_ptestnzc256:
- // case X86::BI__builtin_ia32_vtestzps:
- // case X86::BI__builtin_ia32_vtestzps256:
+ // case X86::BI__builtin_ia32_vtestzps:
+ // case X86::BI__builtin_ia32_vtestzps256:
- // case X86::BI__builtin_ia32_vtestcps:
- // case X86::BI__builtin_ia32_vtestcps256:
+ // case X86::BI__builtin_ia32_vtestcps:
+ // case X86::BI__builtin_ia32_vtestcps256:
- // case X86::BI__builtin_ia32_vtestnzcps:
- // case X86::BI__builtin_ia32_vtestnzcps256:
+ // case X86::BI__builtin_ia32_vtestnzcps:
+ // case X86::BI__builtin_ia32_vtestnzcps256:
- // case X86::BI__builtin_ia32_vtestzpd:
- // case X86::BI__builtin_ia32_vtestzpd256:
+ // case X86::BI__builtin_ia32_vtestzpd:
+ // case X86::BI__builtin_ia32_vtestzpd256:
- // case X86::BI__builtin_ia32_vtestcpd:
- // case X86::BI__builtin_ia32_vtestcpd256:
+ // case X86::BI__builtin_ia32_vtestcpd:
+ // case X86::BI__builtin_ia32_vtestcpd256:
- // case X86::BI__builtin_ia32_vtestnzcpd:
- // case X86::BI__builtin_ia32_vtestnzcpd256:
+ // case X86::BI__builtin_ia32_vtestnzcpd:
+ // case X86::BI__builtin_ia32_vtestnzcpd256:
case Builtin::BI__builtin_elementwise_ctlz:
case Builtin::BI__builtin_elementwise_cttz: {
>From a2e9fa142d45206e35d35052ba1dcd4e868d19e5 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:32:58 -0700
Subject: [PATCH 07/21] Commit changes for clang-format
---
clang/include/clang/Basic/BuiltinsX86.td | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ff6e3cabe5f6e..bf2beadf42677 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,8 +318,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
- "double>, _Constant char)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
>From 6587f3eb87ef112e9f8e61a8958adca87584832e Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:34:03 -0700
Subject: [PATCH 08/21] Commit changes for clang-format
---
clang/include/clang/Basic/BuiltinsX86.td | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index bf2beadf42677..ff6e3cabe5f6e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,7 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
+ "double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
>From 20fcfaafdb310dd50ac3170484ed8e54d0fe380a Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:58:54 -0700
Subject: [PATCH 09/21] Commit changes for clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 39 ++++++++++++++++++++++--
clang/test/CodeGen/X86/sse41-builtins.c | 10 ++++--
2 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d7f3e5dff8126..d14ae83df410c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,6 +2851,39 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+ assert(LHS.getFieldDesc()->isPrimitiveArray() &&
+ RHS.getFieldDesc()->isPrimitiveArray());
+
+ if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS),
+ getElemType(RHS)))
+ return false;
+
+ unsigned SourceLen = LHS.getNumElems();
+ const QualType ElemQT = getElemType(LHS);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+
+ bool Flag = true;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APSInt A = LHS.elem<T>(I).toAPSInt();
+ const APSInt B = RHS.elem<T>(I).toAPSInt();
+ if ( (A & B) != 0 ) {
+ Flag = false;
+ break;
+ }
+ }
+ });
+
+ pushInteger(S, Flag ? 1 : 0, Call->getType());
+ return true;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3581,9 +3614,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return ((APInt)C).isNegative() ? T : F;
});
- case X86::BI__builtin_ia32_ptestz128: {
- }
-
+ case X86::BI__builtin_ia32_ptestz128:
+ return interp__builtin_ptestz(S, OpPC, Call);
+
// case X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 18cc8d582d408..bbd1053910dee 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -7,8 +7,14 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
-
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
#include <immintrin.h>
#include "builtin_test_helpers.h"
>From 1df0602c59d72e6dd142802c97f387380e459166 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:29:36 -0700
Subject: [PATCH 10/21] Commit changes for clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 20 ++++++++++----------
clang/test/CodeGen/X86/sse41-builtins.c | 2 +-
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d14ae83df410c..8de8e935fea70 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2852,7 +2852,7 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
}
static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
+ const CallExpr *Call) {
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &RHS = S.Stk.pop<Pointer>();
@@ -2870,15 +2870,15 @@ static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
bool Flag = true;
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APSInt A = LHS.elem<T>(I).toAPSInt();
- const APSInt B = RHS.elem<T>(I).toAPSInt();
- if ( (A & B) != 0 ) {
- Flag = false;
- break;
- }
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APSInt A = LHS.elem<T>(I).toAPSInt();
+ const APSInt B = RHS.elem<T>(I).toAPSInt();
+ if ((A & B) != 0) {
+ Flag = false;
+ break;
}
- });
+ }
+ });
pushInteger(S, Flag ? 1 : 0, Call->getType());
return true;
@@ -3616,7 +3616,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestz128:
return interp__builtin_ptestz(S, OpPC, Call);
-
+
// case X86::BI__builtin_ia32_ptestz256:
// case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index bbd1053910dee..36a1309feab34 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -470,4 +470,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testz_si128(x, y);
}
-// TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
>From 8d256da3eabd737a9865d5b55bd08aa784763883 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:36:18 -0700
Subject: [PATCH 11/21] Commit changes for clang-format
---
clang/include/clang/Basic/BuiltinsX86.td | 3 +--
clang/test/CodeGen/X86/sse41-builtins.c | 1 -
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ff6e3cabe5f6e..f2c66427c980b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,8 +318,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
- "double>, _Constant char)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 36a1309feab34..fff642b964062 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -15,7 +15,6 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-
#include <immintrin.h>
#include "builtin_test_helpers.h"
>From ccf457b24b33ec3ea95d4e68f815dd0a23036957 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:36:29 -0700
Subject: [PATCH 12/21] Commit changes for clang-format
---
clang/include/clang/Basic/BuiltinsX86.td | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index f2c66427c980b..29bb5e89ee516 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,7 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2,double>, _Constant char)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
+ "_Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
>From d7d4ff48825978f8871cac8ab655c027ecb169df Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:37:18 -0700
Subject: [PATCH 13/21] Commit changes for clang-format
---
clang/test/CodeGen/X86/sse41-builtins.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index fff642b964062..0073a474c9b85 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -15,6 +15,8 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+
+
#include <immintrin.h>
#include "builtin_test_helpers.h"
>From 8b0bf719d2c13ac786ac4d3d5aa39c289dc6b552 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sat, 20 Sep 2025 16:25:15 -0700
Subject: [PATCH 14/21] Move handling from VectorExprEvaluator to
IntExprEvaluator
---
clang/lib/AST/ExprConstant.cpp | 69 ++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 23 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 66280b65d7578..94b096e53e7ad 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,29 +12025,29 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_ptestz128: {
- APValue SourceLHS, SourceRHS;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
- return false;
-
- unsigned SourceLen = SourceLHS.getVectorLength();
- bool Flag = true;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if ((A & B) != 0) {
- Flag = false;
- break;
- }
- }
-
- QualType ResultType = E->getType();
- unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
- bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
- APSInt Result(APInt(BitWidth, Flag), ResultSigned);
- return Success(APValue(Result), E);
- }
+ // case X86::BI__builtin_ia32_ptestz128: {
+ // APValue SourceLHS, SourceRHS;
+ // if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ // !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ // return false;
+
+ // unsigned SourceLen = SourceLHS.getVectorLength();
+ // bool Flag = true;
+ // for (unsigned I = 0; I < SourceLen; ++I) {
+ // const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ // const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ // if ((A & B) != 0) {
+ // Flag = false;
+ // break;
+ // }
+ // }
+
+ // QualType ResultType = E->getType();
+ // unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ // bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ // APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ // return Success(APValue(Result), E);
+ // }
// case X86::BI__builtin_ia32_ptestz256:
@@ -14737,6 +14737,29 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
+ case X86::BI__builtin_ia32_ptestz128: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ bool Flag = true;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if ((A & B) != 0) {
+ Flag = false;
+ break;
+ }
+ }
+
+ QualType ResultType = E->getType();
+ unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ return Success(Result, E);
+ }
}
}
>From b1bbaf923af42d951829d27114b15c727aa2c598 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sun, 21 Sep 2025 18:33:20 -0700
Subject: [PATCH 15/21] Refactor test builtin handling to reduce boilerplate
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 ++--
clang/lib/AST/ExprConstant.cpp | 123 ++++++++++-------------
clang/test/CodeGen/X86/avx-builtins.c | 2 +
clang/test/CodeGen/X86/sse41-builtins.c | 1 +
4 files changed, 65 insertions(+), 79 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8de8e935fea70..ac1e7f0a0d186 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,8 +2851,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
+static bool interp__builtin_test_op(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &RHS = S.Stk.pop<Pointer>();
@@ -2873,7 +2874,7 @@ static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
for (unsigned I = 0; I < SourceLen; ++I) {
const APSInt A = LHS.elem<T>(I).toAPSInt();
const APSInt B = RHS.elem<T>(I).toAPSInt();
- if ((A & B) != 0) {
+ if (!Fn(A,B)) {
Flag = false;
break;
}
@@ -3615,12 +3616,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
});
case X86::BI__builtin_ia32_ptestz128:
- return interp__builtin_ptestz(S, OpPC, Call);
-
- // case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_ptestz256:
+ return interp__builtin_test_op(S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ return (A & B) == 0;
+ });
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
// case X86::BI__builtin_ia32_ptestnzc128:
// case X86::BI__builtin_ia32_ptestnzc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 94b096e53e7ad..9b3116ce07d6e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,56 +12025,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- // case X86::BI__builtin_ia32_ptestz128: {
- // APValue SourceLHS, SourceRHS;
- // if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
- // !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
- // return false;
-
- // unsigned SourceLen = SourceLHS.getVectorLength();
- // bool Flag = true;
- // for (unsigned I = 0; I < SourceLen; ++I) {
- // const APInt &A = SourceLHS.getVectorElt(I).getInt();
- // const APInt &B = SourceRHS.getVectorElt(I).getInt();
- // if ((A & B) != 0) {
- // Flag = false;
- // break;
- // }
- // }
-
- // QualType ResultType = E->getType();
- // unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
- // bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
- // APSInt Result(APInt(BitWidth, Flag), ResultSigned);
- // return Success(APValue(Result), E);
- // }
-
- // case X86::BI__builtin_ia32_ptestz256:
-
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
-
- // case X86::BI__builtin_ia32_ptestnzc128:
- // case X86::BI__builtin_ia32_ptestnzc256:
-
- // case X86::BI__builtin_ia32_vtestzps:
- // case X86::BI__builtin_ia32_vtestzps256:
-
- // case X86::BI__builtin_ia32_vtestcps:
- // case X86::BI__builtin_ia32_vtestcps256:
-
- // case X86::BI__builtin_ia32_vtestnzcps:
- // case X86::BI__builtin_ia32_vtestnzcps256:
-
- // case X86::BI__builtin_ia32_vtestzpd:
- // case X86::BI__builtin_ia32_vtestzpd256:
-
- // case X86::BI__builtin_ia32_vtestcpd:
- // case X86::BI__builtin_ia32_vtestcpd256:
-
- // case X86::BI__builtin_ia32_vtestnzcpd:
- // case X86::BI__builtin_ia32_vtestnzcpd256:
-
case Builtin::BI__builtin_elementwise_ctlz:
case Builtin::BI__builtin_elementwise_cttz: {
APValue SourceLHS;
@@ -13638,6 +13588,30 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
+
+ auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ bool Flag = true;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!Fn(A, B)) {
+ Flag = false;
+ break;
+ }
+ }
+
+ QualType ResultType = E->getType();
+ unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ return Success(Result, E);
+ };
switch (BuiltinOp) {
default:
return false;
@@ -14737,29 +14711,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
- case X86::BI__builtin_ia32_ptestz128: {
- APValue SourceLHS, SourceRHS;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
- return false;
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256: {
+ return EvalTestOp([](const APInt& A, const APInt& B){
+ return (A & B) == 0;
+ });
+ }
- unsigned SourceLen = SourceLHS.getVectorLength();
- bool Flag = true;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if ((A & B) != 0) {
- Flag = false;
- break;
- }
- }
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
- QualType ResultType = E->getType();
- unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
- bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
- APSInt Result(APInt(BitWidth, Flag), ResultSigned);
- return Success(Result, E);
- }
+ // case X86::BI__builtin_ia32_ptestnzc128:
+ // case X86::BI__builtin_ia32_ptestnzc256:
+
+ // case X86::BI__builtin_ia32_vtestzps:
+ // case X86::BI__builtin_ia32_vtestzps256:
+
+ // case X86::BI__builtin_ia32_vtestcps:
+ // case X86::BI__builtin_ia32_vtestcps256:
+
+ // case X86::BI__builtin_ia32_vtestnzcps:
+ // case X86::BI__builtin_ia32_vtestnzcps256:
+
+ // case X86::BI__builtin_ia32_vtestzpd:
+ // case X86::BI__builtin_ia32_vtestzpd256:
+
+ // case X86::BI__builtin_ia32_vtestcpd:
+ // case X86::BI__builtin_ia32_vtestcpd256:
+
+ // case X86::BI__builtin_ia32_vtestnzcpd:
+ // case X86::BI__builtin_ia32_vtestnzcpd256:
}
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 347cd9ee6a667..a67da6614f91d 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2082,6 +2082,8 @@ int test_mm256_testz_si256(__m256i A, __m256i B) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testz_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,0}, (__m256i)(__v4du){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,1}, (__m256i)(__v4du){0,0,0,1}) == 0);
__m256 test_mm256_undefined_ps(void) {
// X64-LABEL: test_mm256_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 0073a474c9b85..64295145d1d76 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -472,3 +472,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
return _mm_testz_si128(x, y);
}
TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,1}, (__m128i)(__v2di){0,1}) == 0);
>From 10eae4b173d277b5d54bf48c9981e71c05712cc2 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sun, 21 Sep 2025 18:34:27 -0700
Subject: [PATCH 16/21] Clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 ++++-----
clang/lib/AST/ExprConstant.cpp | 50 ++++++++++++------------
2 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ac1e7f0a0d186..063187f0d8674 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,9 +2851,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_test_op(InterpState &S, CodePtr OpPC,
- const CallExpr *Call,
-llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
+static bool interp__builtin_test_op(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &RHS = S.Stk.pop<Pointer>();
@@ -2874,7 +2874,7 @@ llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
for (unsigned I = 0; I < SourceLen; ++I) {
const APSInt A = LHS.elem<T>(I).toAPSInt();
const APSInt B = RHS.elem<T>(I).toAPSInt();
- if (!Fn(A,B)) {
+ if (!Fn(A, B)) {
Flag = false;
break;
}
@@ -3617,12 +3617,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
- return interp__builtin_test_op(S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
- return (A & B) == 0;
- });
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const APSInt &A, const APSInt &B) { return (A & B) == 0; });
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
+ // case X86::BI__builtin_ia32_ptestc128:
+ // case X86::BI__builtin_ia32_ptestc256:
// case X86::BI__builtin_ia32_ptestnzc128:
// case X86::BI__builtin_ia32_ptestnzc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9b3116ce07d6e..3db6225223a75 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13589,29 +13589,30 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
- auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
- APValue SourceLHS, SourceRHS;
- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
- return false;
+ auto EvalTestOp =
+ [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
- unsigned SourceLen = SourceLHS.getVectorLength();
- bool Flag = true;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!Fn(A, B)) {
- Flag = false;
- break;
- }
- }
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ bool Flag = true;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!Fn(A, B)) {
+ Flag = false;
+ break;
+ }
+ }
- QualType ResultType = E->getType();
- unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
- bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
- APSInt Result(APInt(BitWidth, Flag), ResultSigned);
- return Success(Result, E);
- };
+ QualType ResultType = E->getType();
+ unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ return Success(Result, E);
+ };
switch (BuiltinOp) {
default:
return false;
@@ -14711,11 +14712,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
- case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256: {
- return EvalTestOp([](const APInt& A, const APInt& B){
- return (A & B) == 0;
- });
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
}
// case X86::BI__builtin_ia32_ptestc128:
>From 7c3301b8b6790cb014427558f914a4e0c697b4b5 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 18:00:04 -0700
Subject: [PATCH 17/21] [X86][Clang] VectorExprEvaluator::VisitCallExpr /
InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used
in constexpr
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 157 +++++++++++++++++------
clang/lib/AST/ExprConstant.cpp | 145 +++++++++++++++------
clang/test/CodeGen/X86/avx-builtins.c | 58 ++++++++-
clang/test/CodeGen/X86/sse41-builtins.c | 20 ++-
4 files changed, 300 insertions(+), 80 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 063187f0d8674..97e69868771de 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2853,9 +2853,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
static bool interp__builtin_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
- const Pointer &LHS = S.Stk.pop<Pointer>();
+ llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen)> Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
assert(LHS.getNumElems() == RHS.getNumElems());
assert(LHS.getFieldDesc()->isPrimitiveArray() &&
@@ -2865,23 +2865,11 @@ static bool interp__builtin_test_op(
getElemType(RHS)))
return false;
- unsigned SourceLen = LHS.getNumElems();
+ const unsigned SourceLen = LHS.getNumElems();
const QualType ElemQT = getElemType(LHS);
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
- bool Flag = true;
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APSInt A = LHS.elem<T>(I).toAPSInt();
- const APSInt B = RHS.elem<T>(I).toAPSInt();
- if (!Fn(A, B)) {
- Flag = false;
- break;
- }
- }
- });
-
- pushInteger(S, Flag ? 1 : 0, Call->getType());
+ pushInteger(S, Fn(LHS, RHS, ElemPT, SourceLen) ? 1 : 0, Call->getType());
return true;
}
@@ -3619,32 +3607,123 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestz256:
return interp__builtin_test_op(
S, OpPC, Call,
- [](const APSInt &A, const APSInt &B) { return (A & B) == 0; });
-
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
-
- // case X86::BI__builtin_ia32_ptestnzc128:
- // case X86::BI__builtin_ia32_ptestnzc256:
-
- // case X86::BI__builtin_ia32_vtestzps:
- // case X86::BI__builtin_ia32_vtestzps256:
-
- // case X86::BI__builtin_ia32_vtestcps:
- // case X86::BI__builtin_ia32_vtestcps256:
-
- // case X86::BI__builtin_ia32_vtestnzcps:
- // case X86::BI__builtin_ia32_vtestnzcps256:
-
- // case X86::BI__builtin_ia32_vtestzpd:
- // case X86::BI__builtin_ia32_vtestzpd256:
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APSInt A = LHS.elem<T>(I).toAPSInt();
+ const APSInt B = RHS.elem<T>(I).toAPSInt();
+ if (!((A & B) == 0)) {
+ return false;
+ }
+ }
+ });
+ return true;
+ });
- // case X86::BI__builtin_ia32_vtestcpd:
- // case X86::BI__builtin_ia32_vtestcpd256:
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APSInt A = LHS.elem<T>(I).toAPSInt();
+ const APSInt B = RHS.elem<T>(I).toAPSInt();
+ if (!((~A & B) == 0)) {
+ return false;
+ }
+ }
+ });
+ return true;
+ });
- // case X86::BI__builtin_ia32_vtestnzcpd:
- // case X86::BI__builtin_ia32_vtestnzcpd256:
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ bool Flag1 = false;
+ bool Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APSInt A = LHS.elem<T>(I).toAPSInt();
+ const APSInt B = RHS.elem<T>(I).toAPSInt();
+ if ((A & B) != 0) {
+ Flag1 = true;
+ }
+ if ((~A & B) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
+ });
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256:
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((!ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256:
+ return interp__builtin_test_op(
+ S, OpPC, Call,
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ bool Flag1 = false;
+ bool Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if ((ASigned && BSigned) != 0) {
+ Flag1 = true;
+ }
+ if ((!ASigned && BSigned) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3db6225223a75..cc3e19230a4b2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13590,27 +13590,17 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
auto EvalTestOp =
- [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+ [&](llvm::function_ref<bool(const APValue &, const APValue &, const unsigned SourceLen)> Fn) {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
- unsigned SourceLen = SourceLHS.getVectorLength();
- bool Flag = true;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!Fn(A, B)) {
- Flag = false;
- break;
- }
- }
-
QualType ResultType = E->getType();
unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
- APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)), ResultSigned);
return Success(Result, E);
};
switch (BuiltinOp) {
@@ -14715,32 +14705,111 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256: {
return EvalTestOp(
- [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!((A & B) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ }
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256: {
+ return EvalTestOp(
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!((~A & B) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ }
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256: {
+ return EvalTestOp(
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ int Flag1 = false, Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if ((A & B) != 0) {
+ Flag1 = true;
+ }
+ if ((~A & B) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
+ }
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256: {
+ return EvalTestOp(
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ }
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:{
+ return EvalTestOp(
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((!ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
+ }
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256: {
+ return EvalTestOp(
+ [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+ bool Flag1 = false;
+ bool Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if ((ASigned && BSigned) != 0) {
+ Flag1 = true;
+ }
+ if ((!ASigned && BSigned) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
}
-
- // case X86::BI__builtin_ia32_ptestc128:
- // case X86::BI__builtin_ia32_ptestc256:
-
- // case X86::BI__builtin_ia32_ptestnzc128:
- // case X86::BI__builtin_ia32_ptestnzc256:
-
- // case X86::BI__builtin_ia32_vtestzps:
- // case X86::BI__builtin_ia32_vtestzps256:
-
- // case X86::BI__builtin_ia32_vtestcps:
- // case X86::BI__builtin_ia32_vtestcps256:
-
- // case X86::BI__builtin_ia32_vtestnzcps:
- // case X86::BI__builtin_ia32_vtestnzcps256:
-
- // case X86::BI__builtin_ia32_vtestzpd:
- // case X86::BI__builtin_ia32_vtestzpd256:
-
- // case X86::BI__builtin_ia32_vtestcpd:
- // case X86::BI__builtin_ia32_vtestcpd256:
-
- // case X86::BI__builtin_ia32_vtestnzcpd:
- // case X86::BI__builtin_ia32_vtestnzcpd256:
}
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index a67da6614f91d..b7007245d7e9f 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1998,92 +1998,146 @@ int test_mm_testc_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testc_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0},
+ (__m128d)(__v2df){-3.0, 4.0}) == 1);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+ (__m128d)(__v2df){-3.0, 4.0}) == 0);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+ (__m128d)(__v2df){ 0.0, 5.0}) == 1);
int test_mm256_testc_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testc_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){-1.0, 2.0, -3.0, 4.0},
+ (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){ 1.0, 2.0, -3.0, 4.0},
+ (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testc_pd(
+ (__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0},
+ (__m256d)(__v4df){ 5.0, 6.0, 7.0, 8.0}) == 1);
int test_mm_testc_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testc_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-1,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 1);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,2384.23,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 0);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-2,-9001.1009,-93}, (__m128)(__v4sf){-1.0,-9001,-0.9001,-1000}) == 1);
int test_mm256_testc_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testc_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
int test_mm256_testc_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testc_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testc_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,1,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){-1,-2,1,3}, (__m256i)(__v4di){0,-1,1,1}) == 0);
int test_mm_testnzc_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_testnzc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testnzc_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0},
+ (__m128d)(__v2df){-3.0, -4.0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0},
+ (__m128d)(__v2df){+3.0, -4.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0},
+ (__m128d)(__v2df){-3.0, +4.0}) == 0);
int test_mm256_testnzc_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testnzc_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testnzc_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
int test_mm_testnzc_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testnzc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testnzc_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-9.9,987,-67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-810.0,-1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 0);
int test_mm256_testnzc_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testnzc_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testnzc_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, -2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
int test_mm256_testnzc_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testnzc_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testnzc_si256(A, B);
}
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0);
int test_mm_testz_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_testz_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_testz_pd(A, B);
}
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){-1,0}, (__m128d)(__v2df){0,-1}) == 1);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,-13.13}, (__m128d)(__v2df){0,-11.1}) == 0);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,5.13}, (__m128d)(__v2df){0,-113.1324823}) == 1);
+
int test_mm256_testz_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_testz_pd
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testz_pd(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,0,-47.47,0.00002}, (__m256d)(__v4df){0,-1,74.0101,-1}) == 1);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,3249.9,-47.47,-0.00002}, (__m256d)(__v4df){0,-1,74.0101,-9999900}) == 0);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){0,0,-8,0}, (__m256d)(__v4df){0,-1,-101,-123}) == 0);
int test_mm_testz_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testz_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_testz_ps(A, B);
}
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){-9.9,987,67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){10.0,1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 1);
int test_mm256_testz_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_testz_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_testz_ps(A, B);
}
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 1);
int test_mm256_testz_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_testz_si256
// CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_testz_si256(A, B);
}
-TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,0}, (__m256i)(__v4du){0,0,0,0}) == 1);
-TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,1}, (__m256i)(__v4du){0,0,0,1}) == 0);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,-1,0}) == 0);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){-1,0,1,0}, (__m256i)(__v4di){0,-1,0,1}) == 1);
__m256 test_mm256_undefined_ps(void) {
// X64-LABEL: test_mm256_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 64295145d1d76..f379c572b122d 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -441,30 +441,47 @@ int test_mm_test_all_ones(__m128i x) {
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_all_ones(x);
}
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, -1})) == 1);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v4si){-1, -1, -1, 0x7FFFFFFF})) == 0);
int test_mm_test_all_zeros(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_test_all_zeros
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_all_zeros(x, y);
}
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,0}), ((__m128i)(__v2di){0,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0xFF00,0}), ((__m128i)(__v2di){0x00FF,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){1,0}), ((__m128i)(__v2di){-1,0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,1}), ((__m128i)(__v2di){0,-1})) == 0);
int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_test_mix_ones_zeros
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_test_mix_ones_zeros(x, y);
}
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xFF, 0}), ((__m128i)(__v2di){0xF0, 1})) == 1);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xF0, 0}), ((__m128i)(__v2di){0x0F, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){-1, -1}), ((__m128i)(__v2di){1, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0, 0}), ((__m128i)(__v2di){0, 0})) == 0);
int test_mm_testc_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testc_si128
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testc_si128(x, y);
}
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,-1}, (__m128i)(__v2di){0,1}) == 1);
int test_mm_testnzc_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testnzc_si128
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_testnzc_si128(x, y);
}
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){3,0}, (__m128i)(__v2di){1,1}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){32,-1}, (__m128i)(__v2di){15,0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){0,999}, (__m128i)(__v2di){0,999}) == 0);
int test_mm_testz_si128(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_testz_si128
@@ -472,4 +489,5 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
return _mm_testz_si128(x, y);
}
TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
-TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,1}, (__m128i)(__v2di){0,1}) == 0);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){0,1}) == 1);
>From 6f140099c9058a0546b7703ae6c4ae0e2508199b Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 18:00:32 -0700
Subject: [PATCH 18/21] Clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 104 +++++++------
clang/lib/AST/ExprConstant.cpp | 189 ++++++++++++-----------
2 files changed, 152 insertions(+), 141 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 97e69868771de..14fe009a6f281 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2853,7 +2853,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
static bool interp__builtin_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen)> Fn) {
+ llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS,
+ const OptPrimType ElemPT, const unsigned SourceLen)>
+ Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
@@ -3607,7 +3609,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestz256:
return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
for (unsigned I = 0; I < SourceLen; ++I) {
const APSInt A = LHS.elem<T>(I).toAPSInt();
@@ -3622,9 +3625,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256:
- return interp__builtin_test_op(
+ return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
for (unsigned I = 0; I < SourceLen; ++I) {
const APSInt A = LHS.elem<T>(I).toAPSInt();
@@ -3639,9 +3643,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256:
- return interp__builtin_test_op(
+ return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
bool Flag1 = false;
bool Flag2 = false;
@@ -3661,68 +3666,71 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
- case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256:
return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((ASigned && BSigned) == 0)) {
- return false;
- }
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((ASigned && BSigned) == 0)) {
+ return false;
}
- return true;
+ }
+ return true;
});
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
- case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256:
return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((!ASigned && BSigned) == 0)) {
- return false;
- }
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((!ASigned && BSigned) == 0)) {
+ return false;
}
- return true;
+ }
+ return true;
});
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
- case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256:
return interp__builtin_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) {
+ [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+ const unsigned SourceLen) {
bool Flag1 = false;
bool Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if ((ASigned && BSigned) != 0) {
- Flag1 = true;
- }
- if ((!ASigned && BSigned) != 0) {
- Flag2 = true;
- }
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if ((ASigned && BSigned) != 0) {
+ Flag1 = true;
}
- return Flag1 && Flag2;
+ if ((!ASigned && BSigned) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
});
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index cc3e19230a4b2..41823b8159ee9 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13590,7 +13590,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
auto EvalTestOp =
- [&](llvm::function_ref<bool(const APValue &, const APValue &, const unsigned SourceLen)> Fn) {
+ [&](llvm::function_ref<bool(const APValue &, const APValue &,
+ const unsigned SourceLen)>
+ Fn) {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
@@ -13600,7 +13602,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
unsigned SourceLen = SourceLHS.getVectorLength();
- APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)), ResultSigned);
+ APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
+ ResultSigned);
return Success(Result, E);
};
switch (BuiltinOp) {
@@ -14704,111 +14707,111 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
}
case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256: {
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!((A & B) == 0)) {
- return false;
- }
- }
- return true;
- });
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!((A & B) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
}
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256: {
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!((~A & B) == 0)) {
- return false;
- }
- }
- return true;
- });
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if (!((~A & B) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
}
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256: {
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- int Flag1 = false, Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if ((A & B) != 0) {
- Flag1 = true;
- }
- if ((~A & B) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
- });
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ int Flag1 = false, Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getInt();
+ if ((A & B) != 0) {
+ Flag1 = true;
+ }
+ if ((~A & B) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
}
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
- case X86::BI__builtin_ia32_vtestzpd:
- case X86::BI__builtin_ia32_vtestzpd256: {
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
- });
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256: {
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
}
case X86::BI__builtin_ia32_vtestcps:
- case X86::BI__builtin_ia32_vtestcps256:
- case X86::BI__builtin_ia32_vtestcpd:
- case X86::BI__builtin_ia32_vtestcpd256:{
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((!ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
- });
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256: {
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if (!((!ASigned && BSigned) == 0)) {
+ return false;
+ }
+ }
+ return true;
+ });
}
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
- case X86::BI__builtin_ia32_vtestnzcpd:
- case X86::BI__builtin_ia32_vtestnzcpd256: {
- return EvalTestOp(
- [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
- bool Flag1 = false;
- bool Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if ((ASigned && BSigned) != 0) {
- Flag1 = true;
- }
- if ((!ASigned && BSigned) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
- });
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256: {
+ return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+ const unsigned SourceLen) {
+ bool Flag1 = false;
+ bool Flag2 = false;
+ for (unsigned I = 0; I < SourceLen; ++I) {
+ const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = A.getBitWidth() - 1;
+ const bool ASigned = A[SignBit];
+ const bool BSigned = B[SignBit];
+ if ((ASigned && BSigned) != 0) {
+ Flag1 = true;
+ }
+ if ((!ASigned && BSigned) != 0) {
+ Flag2 = true;
+ }
+ }
+ return Flag1 && Flag2;
+ });
}
}
}
>From 3be64135e2795539f65fc940faea67482c7b8ef2 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 19:07:27 -0700
Subject: [PATCH 19/21] Rebase
---
clang/lib/AST/ExprConstant.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b38203c355719..ffc4b600f9b00 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14889,6 +14889,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}
}
+}
/// Determine whether this is a pointer past the end of the complete
/// object referred to by the lvalue.
>From a820f7978e5f87b8c6123911ef3682c8c6896e86 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 19:12:52 -0700
Subject: [PATCH 20/21] Remove extra newlines
---
clang/lib/AST/ExprConstant.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ffc4b600f9b00..4c05f09a4025e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13588,8 +13588,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
-
-
auto EvalTestOp =
[&](llvm::function_ref<bool(const APValue &, const APValue &,
const unsigned SourceLen)>
>From ef27cd0828e972abaa99b1bbf1a92096c7482631 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 26 Sep 2025 01:12:44 -0700
Subject: [PATCH 21/21] Refactor out boilerplate
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 170 ++++++++---------------
clang/lib/AST/ExprConstant.cpp | 154 +++++++-------------
clang/test/CodeGen/X86/avx-builtins.c | 5 +-
3 files changed, 114 insertions(+), 215 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index aed1615aed75f..e7484dc7d662d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2843,11 +2843,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_test_op(
+static bool interp__builtin_ia32_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS,
- const OptPrimType ElemPT, const unsigned SourceLen)>
- Fn) {
+ llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
@@ -2863,8 +2861,47 @@ static bool interp__builtin_test_op(
const QualType ElemQT = getElemType(LHS);
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
- pushInteger(S, Fn(LHS, RHS, ElemPT, SourceLen) ? 1 : 0, Call->getType());
- return true;
+ if (ElemQT->isIntegerType()) {
+ APInt FirstElem;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ FirstElem = LHS.elem<T>(0).toAPSInt();
+ });
+ const unsigned LaneWidth = FirstElem.getBitWidth();
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ ALane = LHS.elem<T>(I).toAPSInt();
+ BLane = RHS.elem<T>(I).toAPSInt();
+ });
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType());
+ return true;
+ } else if (ElemQT->isFloatingType()) {
+ APInt ASignBits(SourceLen, 0);
+ APInt BSignBits(SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ using T = PrimConv<PT_Float>::T;
+ APInt ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ APInt BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+ const unsigned SignBit = ALane.getBitWidth() - 1;
+ const bool ALaneSign = ALane[SignBit];
+ const bool BLaneSign = BLane[SignBit];
+ ASignBits.setBitVal(I, ALaneSign);
+ BSignBits.setBitVal(I, BLaneSign);
+ }
+ pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType());
+ return true;
+ } else { // Must be integer or float type
+ return false;
+ }
}
static bool interp__builtin_elementwise_triop(
@@ -3602,133 +3639,38 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
-
case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
- return interp__builtin_test_op(
- S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APSInt A = LHS.elem<T>(I).toAPSInt();
- const APSInt B = RHS.elem<T>(I).toAPSInt();
- if (!((A & B) == 0)) {
- return false;
- }
- }
- });
- return true;
- });
-
- case X86::BI__builtin_ia32_ptestc128:
- case X86::BI__builtin_ia32_ptestc256:
- return interp__builtin_test_op(
- S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APSInt A = LHS.elem<T>(I).toAPSInt();
- const APSInt B = RHS.elem<T>(I).toAPSInt();
- if (!((~A & B) == 0)) {
- return false;
- }
- }
- });
- return true;
- });
-
- case X86::BI__builtin_ia32_ptestnzc128:
- case X86::BI__builtin_ia32_ptestnzc256:
- return interp__builtin_test_op(
- S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
- bool Flag1 = false;
- bool Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APSInt A = LHS.elem<T>(I).toAPSInt();
- const APSInt B = RHS.elem<T>(I).toAPSInt();
- if ((A & B) != 0) {
- Flag1 = true;
- }
- if ((~A & B) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
- });
- });
-
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256:
- return interp__builtin_test_op(
+ return interp__builtin_ia32_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
+ [](const APInt &A, const APInt &B) {
+ return (A & B) == 0;
});
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256:
- return interp__builtin_test_op(
+ return interp__builtin_ia32_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((!ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
+ [](const APInt &A, const APInt &B) {
+ return (~A & B) == 0;
});
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256:
- return interp__builtin_test_op(
+ return interp__builtin_ia32_test_op(
S, OpPC, Call,
- [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
- const unsigned SourceLen) {
- bool Flag1 = false;
- bool Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- using T = PrimConv<PT_Float>::T;
- const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if ((ASigned && BSigned) != 0) {
- Flag1 = true;
- }
- if ((!ASigned && BSigned) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
+ [](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
});
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4c05f09a4025e..232665dff7a77 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13589,21 +13589,54 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
auto EvalTestOp =
- [&](llvm::function_ref<bool(const APValue &, const APValue &,
- const unsigned SourceLen)>
- Fn) {
+ [&](llvm::function_ref<bool(const APInt &, const APInt &)>Fn) {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
- QualType ResultType = E->getType();
- unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
- bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+ // QualType ResultType = E->getType();
+ // unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+ // bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
unsigned SourceLen = SourceLHS.getVectorLength();
- APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
- ResultSigned);
- return Success(Result, E);
+
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+
+ if (ElemQT->isIntegerType()) {
+ const unsigned LaneWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane = SourceLHS.getVectorElt(I).getInt();
+ APInt BLane = SourceRHS.getVectorElt(I).getInt();
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ return Success(Fn(AWide, BWide), E);
+
+ } else if (ElemQT->isFloatingType()) {
+ APInt ASignBits(SourceLen, 0);
+ APInt BSignBits(SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+ const unsigned SignBit = ALane.getBitWidth() - 1;
+ const bool ALaneSign = ALane[SignBit];
+ const bool BLaneSign = BLane[SignBit];
+ ASignBits.setBitVal(I, ALaneSign);
+ BSignBits.setBitVal(I, BLaneSign);
+ }
+ return Success(Fn(ASignBits, BSignBits), E);
+
+ } else { // Must be integer or float type
+ return false;
+ }
+ // APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
+ // ResultSigned);
+ // return Success(Result, E);
};
auto HandleMaskBinOp =
@@ -14720,114 +14753,35 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(Result, E);
}
case X86::BI__builtin_ia32_ptestz128:
- case X86::BI__builtin_ia32_ptestz256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!((A & B) == 0)) {
- return false;
- }
- }
- return true;
- });
- }
- case X86::BI__builtin_ia32_ptestc128:
- case X86::BI__builtin_ia32_ptestc256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if (!((~A & B) == 0)) {
- return false;
- }
- }
- return true;
- });
- }
- case X86::BI__builtin_ia32_ptestnzc128:
- case X86::BI__builtin_ia32_ptestnzc256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- int Flag1 = false, Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getInt();
- const APInt &B = SourceRHS.getVectorElt(I).getInt();
- if ((A & B) != 0) {
- Flag1 = true;
- }
- if ((~A & B) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
- });
- }
+ case X86::BI__builtin_ia32_ptestz256:
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return (A & B) == 0;
});
}
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if (!((!ASigned && BSigned) == 0)) {
- return false;
- }
- }
- return true;
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return (~A & B) == 0;
});
}
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256: {
- return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
- const unsigned SourceLen) {
- bool Flag1 = false;
- bool Flag2 = false;
- for (unsigned I = 0; I < SourceLen; ++I) {
- const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
- const unsigned SignBit = A.getBitWidth() - 1;
- const bool ASigned = A[SignBit];
- const bool BSigned = B[SignBit];
- if ((ASigned && BSigned) != 0) {
- Flag1 = true;
- }
- if ((!ASigned && BSigned) != 0) {
- Flag2 = true;
- }
- }
- return Flag1 && Flag2;
- });
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
}
-
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index b7007245d7e9f..fabbebc69c876 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2064,7 +2064,9 @@ int test_mm256_testnzc_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_testnzc_pd(A, B);
}
-TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
int test_mm_testnzc_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_testnzc_ps
@@ -2090,6 +2092,7 @@ int test_mm256_testnzc_si256(__m256i A, __m256i B) {
return _mm256_testnzc_si256(A, B);
}
TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){1,0,0,0}, (__m256i)(__v4di){3,0,0,0}) == 1);
TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0);
TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0);
More information about the cfe-commits
mailing list