[clang] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used in constexpr (PR #160428)

via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 26 01:14:07 PDT 2025


https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/160428

>From 00ec5f957ce4c63ba9ba377974ba1f8db588822b Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 14:21:55 -0700
Subject: [PATCH 01/21] Mark vector test intrinsics constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td | 26 ++++++--
 clang/lib/Headers/avxintrin.h            | 75 ++++++++++--------------
 clang/lib/Headers/smmintrin.h            | 12 ++--
 3 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 044c755d4d7cf..956770126c62c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,10 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
-  def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
-  def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
-  def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
+                        "double>, _Constant char)">;
   def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
   def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
   def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
@@ -329,6 +327,16 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
 }
 
+let Features = "sse4.1",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+  def ptestz128
+      : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def ptestc128
+      : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+  def ptestnzc128
+      : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
 let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
   def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -514,7 +522,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
 }
 
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
   def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
   def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -523,7 +532,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
   def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
   def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
   def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -533,6 +543,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
   def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
   def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
   def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx",
+    Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
   def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
   def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
 }
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a7f70994be9db..b37149709c962 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2542,9 +2542,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+                                                                 __m128d __b) {
   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2571,9 +2570,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+                                                                 __m128d __b) {
   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2601,9 +2599,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2630,9 +2627,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2659,9 +2655,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2689,9 +2684,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+                                                                   __m128 __b) {
   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2718,9 +2712,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+                                                                 __m256d __b) {
   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2747,9 +2740,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+                                                                 __m256d __b) {
   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2777,9 +2769,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2806,9 +2797,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+                                                                 __m256 __b) {
   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2835,9 +2825,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+                                                                 __m256 __b) {
   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2865,9 +2854,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+                                                                   __m256 __b) {
   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2891,9 +2879,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2917,9 +2904,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2944,9 +2930,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
 }
 
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 6319fdbbeb8f0..062e831259c7f 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) {
 /// \param __V
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
-                                                         __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testz_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
 }
 
@@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
 /// \param __V
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are all ones; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
-                                                         __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testc_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
 }
 
@@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are neither all zeros nor all ones;
 ///    FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
-                                                           __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testnzc_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
 }
 

>From 99b5ecf761a0637e44569f57cd0cb049b4485ba3 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 17:48:07 -0700
Subject: [PATCH 02/21] Add builtin case statements in
 VectorExprEvaluator::VisitCallExpr

---
 clang/lib/AST/ExprConstant.cpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 30ae3f8802f14..ac5994e7e5e61 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,6 +12025,33 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case X86::BI__builtin_ia32_ptestz128:
+  case X86::BI__builtin_ia32_ptestz256:
+
+  // case X86::BI__builtin_ia32_ptestc128:
+  // case X86::BI__builtin_ia32_ptestc256:
+
+  // case X86::BI__builtin_ia32_ptestnzc128:
+  // case X86::BI__builtin_ia32_ptestnzc256:
+
+  // case X86::BI__builtin_ia32_vtestzps:
+  // case X86::BI__builtin_ia32_vtestzps256:
+
+  // case X86::BI__builtin_ia32_vtestcps:
+  // case X86::BI__builtin_ia32_vtestcps256:
+
+  // case X86::BI__builtin_ia32_vtestnzcps:
+  // case X86::BI__builtin_ia32_vtestnzcps256:
+
+  // case X86::BI__builtin_ia32_vtestzpd:
+  // case X86::BI__builtin_ia32_vtestzpd256:
+
+  // case X86::BI__builtin_ia32_vtestcpd:
+  // case X86::BI__builtin_ia32_vtestcpd256:
+
+  // case X86::BI__builtin_ia32_vtestnzcpd:
+  // case X86::BI__builtin_ia32_vtestnzcpd256:
+
   case Builtin::BI__builtin_elementwise_ctlz:
   case Builtin::BI__builtin_elementwise_cttz: {
     APValue SourceLHS;

>From bcca2d1e8c27ec84598b6de76672113b3579db6c Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 16:09:14 -0700
Subject: [PATCH 03/21] Add builtin case statements in InterpBuiltin.cpp

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 64962ee13d6b0..9171938ed89ae 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,6 +3580,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
           return ((APInt)C).isNegative() ? T : F;
         });
+  case X86::BI__builtin_ia32_ptestz128:
+  case X86::BI__builtin_ia32_ptestz256:
+
+  // case X86::BI__builtin_ia32_ptestc128:
+  // case X86::BI__builtin_ia32_ptestc256:
+
+  // case X86::BI__builtin_ia32_ptestnzc128:
+  // case X86::BI__builtin_ia32_ptestnzc256:
+
+  // case X86::BI__builtin_ia32_vtestzps:
+  // case X86::BI__builtin_ia32_vtestzps256:
+
+  // case X86::BI__builtin_ia32_vtestcps:
+  // case X86::BI__builtin_ia32_vtestcps256:
+
+  // case X86::BI__builtin_ia32_vtestnzcps:
+  // case X86::BI__builtin_ia32_vtestnzcps256:
+
+  // case X86::BI__builtin_ia32_vtestzpd:
+  // case X86::BI__builtin_ia32_vtestzpd256:
+
+  // case X86::BI__builtin_ia32_vtestcpd:
+  // case X86::BI__builtin_ia32_vtestcpd256:
+
+  // case X86::BI__builtin_ia32_vtestnzcpd:
+  // case X86::BI__builtin_ia32_vtestnzcpd256:
 
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:

>From d6537dea629c063e6e7c17ab0d2d4c453966b2f1 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 21:45:27 -0700
Subject: [PATCH 04/21] Stash

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  4 +-
 clang/lib/AST/ExprConstant.cpp           | 54 +++++++++++++++++++++++-
 clang/test/CodeGen/X86/sse41-builtins.c  | 10 +----
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9171938ed89ae..f77e6ec1eca7b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,8 +3580,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
           return ((APInt)C).isNegative() ? T : F;
         });
-  case X86::BI__builtin_ia32_ptestz128:
-  case X86::BI__builtin_ia32_ptestz256:
+  // case X86::BI__builtin_ia32_ptestz128:
+  // case X86::BI__builtin_ia32_ptestz256:
 
   // case X86::BI__builtin_ia32_ptestc128:
   // case X86::BI__builtin_ia32_ptestc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ac5994e7e5e61..76788a2c41315 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,8 +12025,58 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
-  case X86::BI__builtin_ia32_ptestz128:
-  case X86::BI__builtin_ia32_ptestz256:
+  case X86::BI__builtin_ia32_ptestz128: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    unsigned SourceLen = SourceLHS.getVectorLength();
+    bool Flag = true;
+    for (unsigned I = 0; I < SourceLen; ++I) {
+      const APInt &A = SourceLHS.getVectorElt(I).getInt();
+      const APInt &B = SourceRHS.getVectorElt(I).getInt();
+      if ((A & B) != 0) {
+        Flag = false;
+        break;
+      }
+    }
+
+    QualType ResultType = E->getType();
+    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+    return Success(APValue(Result), E);
+
+    // auto *DestTy = E->getType()->castAs<VectorType>();
+    // QualType DestEltTy = DestTy->getElementType();
+    // bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    // const unsigned SourceLen = SourceLHS.getVectorLength();
+    // SmallVector<APValue, 4> ResultElements;
+    // ResultElements.reserve(SourceLen);
+    
+    // unsigned BitWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
+
+    // auto PopulateResultElements = [&](bool Flag) {
+    //   for (unsigned I = 0; I < SourceLen - 1; ++I) {
+    //     ResultElements.emplace_back(APSInt(APInt::getZero(BitWidth), DestUnsigned));
+    //   }
+    //   ResultElements.emplace_back(APSInt(APInt(BitWidth, Flag), DestUnsigned));
+    // };
+
+    // for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+    //   const APInt &A = SourceLHS.getVectorElt(EltNum).getInt();
+    //   const APInt &B = SourceRHS.getVectorElt(EltNum).getInt();
+    //   if ((A & B) != 0) {
+    //     PopulateResultElements(false);
+    //     return Success(APValue(ResultElements.data(), SourceLen), E);
+    //   }
+    // }
+    // PopulateResultElements(true);
+    // return Success(APValue(ResultElements.data(), SourceLen), E);
+  }
+
+  // case clang::X86::BI__builtin_ia32_ptestz256:
 
   // case X86::BI__builtin_ia32_ptestc128:
   // case X86::BI__builtin_ia32_ptestc256:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index c7265b188d572..18cc8d582d408 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -7,14 +7,7 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
 
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+
 
 
 #include <immintrin.h>
@@ -471,3 +464,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testz_si128(x, y);
 }
+// TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);

>From dc03a377c9402cd0faf9f567c861a9dc99061dab Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:03:52 -0700
Subject: [PATCH 05/21] Commit changes for rebase

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  6 ++++-
 clang/lib/AST/ExprConstant.cpp           | 31 ++----------------------
 2 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f77e6ec1eca7b..a255c5f52cb3c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,7 +3580,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
           return ((APInt)C).isNegative() ? T : F;
         });
-  // case X86::BI__builtin_ia32_ptestz128:
+  
+  case X86::BI__builtin_ia32_ptestz128: {
+
+  }
+
   // case X86::BI__builtin_ia32_ptestz256:
 
   // case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 76788a2c41315..4be9e6caf49c0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12047,36 +12047,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
     APSInt Result(APInt(BitWidth, Flag), ResultSigned);
     return Success(APValue(Result), E);
+  }
 
-    // auto *DestTy = E->getType()->castAs<VectorType>();
-    // QualType DestEltTy = DestTy->getElementType();
-    // bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
-    // const unsigned SourceLen = SourceLHS.getVectorLength();
-    // SmallVector<APValue, 4> ResultElements;
-    // ResultElements.reserve(SourceLen);
-    
-    // unsigned BitWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
-
-    // auto PopulateResultElements = [&](bool Flag) {
-    //   for (unsigned I = 0; I < SourceLen - 1; ++I) {
-    //     ResultElements.emplace_back(APSInt(APInt::getZero(BitWidth), DestUnsigned));
-    //   }
-    //   ResultElements.emplace_back(APSInt(APInt(BitWidth, Flag), DestUnsigned));
-    // };
-
-    // for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
-    //   const APInt &A = SourceLHS.getVectorElt(EltNum).getInt();
-    //   const APInt &B = SourceRHS.getVectorElt(EltNum).getInt();
-    //   if ((A & B) != 0) {
-    //     PopulateResultElements(false);
-    //     return Success(APValue(ResultElements.data(), SourceLen), E);
-    //   }
-    // }
-    // PopulateResultElements(true);
-    // return Success(APValue(ResultElements.data(), SourceLen), E);
-  }
-
-  // case clang::X86::BI__builtin_ia32_ptestz256:
+  // case X86::BI__builtin_ia32_ptestz256:
 
   // case X86::BI__builtin_ia32_ptestc128:
   // case X86::BI__builtin_ia32_ptestc256:

>From f04932c1327846d1f5660d2a9d6ea38eb86f9d38 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:31:11 -0700
Subject: [PATCH 06/21] Commit changes for clang-format

---
 clang/include/clang/Basic/BuiltinsX86.td |  1 -
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 37 ++++++++++++------------
 clang/lib/AST/ExprConstant.cpp           | 34 +++++++++++-----------
 3 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 956770126c62c..ff6e3cabe5f6e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -521,7 +521,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
   def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
 }
 
-
 let Features = "avx",
     Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a255c5f52cb3c..d7f3e5dff8126 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3580,36 +3580,35 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
           return ((APInt)C).isNegative() ? T : F;
         });
-  
-  case X86::BI__builtin_ia32_ptestz128: {
 
+  case X86::BI__builtin_ia32_ptestz128: {
   }
 
-  // case X86::BI__builtin_ia32_ptestz256:
+    // case X86::BI__builtin_ia32_ptestz256:
 
-  // case X86::BI__builtin_ia32_ptestc128:
-  // case X86::BI__builtin_ia32_ptestc256:
+    // case X86::BI__builtin_ia32_ptestc128:
+    // case X86::BI__builtin_ia32_ptestc256:
 
-  // case X86::BI__builtin_ia32_ptestnzc128:
-  // case X86::BI__builtin_ia32_ptestnzc256:
+    // case X86::BI__builtin_ia32_ptestnzc128:
+    // case X86::BI__builtin_ia32_ptestnzc256:
 
-  // case X86::BI__builtin_ia32_vtestzps:
-  // case X86::BI__builtin_ia32_vtestzps256:
+    // case X86::BI__builtin_ia32_vtestzps:
+    // case X86::BI__builtin_ia32_vtestzps256:
 
-  // case X86::BI__builtin_ia32_vtestcps:
-  // case X86::BI__builtin_ia32_vtestcps256:
+    // case X86::BI__builtin_ia32_vtestcps:
+    // case X86::BI__builtin_ia32_vtestcps256:
 
-  // case X86::BI__builtin_ia32_vtestnzcps:
-  // case X86::BI__builtin_ia32_vtestnzcps256:
+    // case X86::BI__builtin_ia32_vtestnzcps:
+    // case X86::BI__builtin_ia32_vtestnzcps256:
 
-  // case X86::BI__builtin_ia32_vtestzpd:
-  // case X86::BI__builtin_ia32_vtestzpd256:
+    // case X86::BI__builtin_ia32_vtestzpd:
+    // case X86::BI__builtin_ia32_vtestzpd256:
 
-  // case X86::BI__builtin_ia32_vtestcpd:
-  // case X86::BI__builtin_ia32_vtestcpd256:
+    // case X86::BI__builtin_ia32_vtestcpd:
+    // case X86::BI__builtin_ia32_vtestcpd256:
 
-  // case X86::BI__builtin_ia32_vtestnzcpd:
-  // case X86::BI__builtin_ia32_vtestnzcpd256:
+    // case X86::BI__builtin_ia32_vtestnzcpd:
+    // case X86::BI__builtin_ia32_vtestnzcpd256:
 
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4be9e6caf49c0..66280b65d7578 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12049,31 +12049,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(Result), E);
   }
 
-  // case X86::BI__builtin_ia32_ptestz256:
+    // case X86::BI__builtin_ia32_ptestz256:
 
-  // case X86::BI__builtin_ia32_ptestc128:
-  // case X86::BI__builtin_ia32_ptestc256:
+    // case X86::BI__builtin_ia32_ptestc128:
+    // case X86::BI__builtin_ia32_ptestc256:
 
-  // case X86::BI__builtin_ia32_ptestnzc128:
-  // case X86::BI__builtin_ia32_ptestnzc256:
+    // case X86::BI__builtin_ia32_ptestnzc128:
+    // case X86::BI__builtin_ia32_ptestnzc256:
 
-  // case X86::BI__builtin_ia32_vtestzps:
-  // case X86::BI__builtin_ia32_vtestzps256:
+    // case X86::BI__builtin_ia32_vtestzps:
+    // case X86::BI__builtin_ia32_vtestzps256:
 
-  // case X86::BI__builtin_ia32_vtestcps:
-  // case X86::BI__builtin_ia32_vtestcps256:
+    // case X86::BI__builtin_ia32_vtestcps:
+    // case X86::BI__builtin_ia32_vtestcps256:
 
-  // case X86::BI__builtin_ia32_vtestnzcps:
-  // case X86::BI__builtin_ia32_vtestnzcps256:
+    // case X86::BI__builtin_ia32_vtestnzcps:
+    // case X86::BI__builtin_ia32_vtestnzcps256:
 
-  // case X86::BI__builtin_ia32_vtestzpd:
-  // case X86::BI__builtin_ia32_vtestzpd256:
+    // case X86::BI__builtin_ia32_vtestzpd:
+    // case X86::BI__builtin_ia32_vtestzpd256:
 
-  // case X86::BI__builtin_ia32_vtestcpd:
-  // case X86::BI__builtin_ia32_vtestcpd256:
+    // case X86::BI__builtin_ia32_vtestcpd:
+    // case X86::BI__builtin_ia32_vtestcpd256:
 
-  // case X86::BI__builtin_ia32_vtestnzcpd:
-  // case X86::BI__builtin_ia32_vtestnzcpd256:
+    // case X86::BI__builtin_ia32_vtestnzcpd:
+    // case X86::BI__builtin_ia32_vtestnzcpd256:
 
   case Builtin::BI__builtin_elementwise_ctlz:
   case Builtin::BI__builtin_elementwise_cttz: {

>From a2e9fa142d45206e35d35052ba1dcd4e868d19e5 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:32:58 -0700
Subject: [PATCH 07/21] Commit changes for clang-format

---
 clang/include/clang/Basic/BuiltinsX86.td | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ff6e3cabe5f6e..bf2beadf42677 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,8 +318,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
-                        "double>, _Constant char)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
   def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
   def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
   def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;

>From 6587f3eb87ef112e9f8e61a8958adca87584832e Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:34:03 -0700
Subject: [PATCH 08/21] Commit changes for clang-format

---
 clang/include/clang/Basic/BuiltinsX86.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index bf2beadf42677..ff6e3cabe5f6e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,7 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
+                        "double>, _Constant char)">;
   def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
   def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
   def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;

>From 20fcfaafdb310dd50ac3170484ed8e54d0fe380a Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 17:58:54 -0700
Subject: [PATCH 09/21] Commit changes for clang-format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 39 ++++++++++++++++++++++--
 clang/test/CodeGen/X86/sse41-builtins.c  | 10 ++++--
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d7f3e5dff8126..d14ae83df410c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,6 +2851,39 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
+                                  const CallExpr *Call) {
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &RHS = S.Stk.pop<Pointer>();
+
+  assert(LHS.getNumElems() == RHS.getNumElems());
+  assert(LHS.getFieldDesc()->isPrimitiveArray() &&
+         RHS.getFieldDesc()->isPrimitiveArray());
+
+  if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS),
+                                                getElemType(RHS)))
+    return false;
+
+  unsigned SourceLen = LHS.getNumElems();
+  const QualType ElemQT = getElemType(LHS);
+  const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+
+  bool Flag = true;
+  INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APSInt A = LHS.elem<T>(I).toAPSInt();
+        const APSInt B = RHS.elem<T>(I).toAPSInt();
+        if ( (A & B) != 0 ) { 
+          Flag = false; 
+          break; 
+        }
+      }
+    });
+
+  pushInteger(S, Flag ? 1 : 0, Call->getType());
+  return true;
+}
+
 static bool interp__builtin_elementwise_triop(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3581,9 +3614,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
           return ((APInt)C).isNegative() ? T : F;
         });
 
-  case X86::BI__builtin_ia32_ptestz128: {
-  }
-
+  case X86::BI__builtin_ia32_ptestz128:
+    return interp__builtin_ptestz(S, OpPC, Call);
+  
     // case X86::BI__builtin_ia32_ptestz256:
 
     // case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 18cc8d582d408..bbd1053910dee 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -7,8 +7,14 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK
 
-
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"

>From 1df0602c59d72e6dd142802c97f387380e459166 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:29:36 -0700
Subject: [PATCH 10/21] Commit changes for clang-format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 20 ++++++++++----------
 clang/test/CodeGen/X86/sse41-builtins.c  |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d14ae83df410c..8de8e935fea70 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2852,7 +2852,7 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
 }
 
 static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
-                                  const CallExpr *Call) {
+                                   const CallExpr *Call) {
   const Pointer &LHS = S.Stk.pop<Pointer>();
   const Pointer &RHS = S.Stk.pop<Pointer>();
 
@@ -2870,15 +2870,15 @@ static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
 
   bool Flag = true;
   INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APSInt A = LHS.elem<T>(I).toAPSInt();
-        const APSInt B = RHS.elem<T>(I).toAPSInt();
-        if ( (A & B) != 0 ) { 
-          Flag = false; 
-          break; 
-        }
+    for (unsigned I = 0; I < SourceLen; ++I) {
+      const APSInt A = LHS.elem<T>(I).toAPSInt();
+      const APSInt B = RHS.elem<T>(I).toAPSInt();
+      if ((A & B) != 0) {
+        Flag = false;
+        break;
       }
-    });
+    }
+  });
 
   pushInteger(S, Flag ? 1 : 0, Call->getType());
   return true;
@@ -3616,7 +3616,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_ptestz128:
     return interp__builtin_ptestz(S, OpPC, Call);
-  
+
     // case X86::BI__builtin_ia32_ptestz256:
 
     // case X86::BI__builtin_ia32_ptestc128:
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index bbd1053910dee..36a1309feab34 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -470,4 +470,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testz_si128(x, y);
 }
-// TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);

>From 8d256da3eabd737a9865d5b55bd08aa784763883 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:36:18 -0700
Subject: [PATCH 11/21] Commit changes for clang-format

---
 clang/include/clang/Basic/BuiltinsX86.td | 3 +--
 clang/test/CodeGen/X86/sse41-builtins.c  | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ff6e3cabe5f6e..f2c66427c980b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,8 +318,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, "
-                        "double>, _Constant char)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2,double>, _Constant char)">;
   def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
   def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
   def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 36a1309feab34..fff642b964062 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -15,7 +15,6 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
-
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 

>From ccf457b24b33ec3ea95d4e68f815dd0a23036957 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:36:29 -0700
Subject: [PATCH 12/21] Commit changes for clang-format

---
 clang/include/clang/Basic/BuiltinsX86.td | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index f2c66427c980b..29bb5e89ee516 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -318,7 +318,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
   def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
-  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2,double>, _Constant char)">;
+  def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
+                        "_Vector<2,double>, _Constant char)">;
   def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
   def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
   def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;

>From d7d4ff48825978f8871cac8ab655c027ecb169df Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 18 Sep 2025 18:37:18 -0700
Subject: [PATCH 13/21] Commit changes for clang-format

---
 clang/test/CodeGen/X86/sse41-builtins.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index fff642b964062..0073a474c9b85 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -15,6 +15,8 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror  -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+
+
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 

>From 8b0bf719d2c13ac786ac4d3d5aa39c289dc6b552 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sat, 20 Sep 2025 16:25:15 -0700
Subject: [PATCH 14/21] Move handling from VectorExprEvaluator to
 IntExprEvaluator

---
 clang/lib/AST/ExprConstant.cpp | 69 ++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 66280b65d7578..94b096e53e7ad 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,29 +12025,29 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
-  case X86::BI__builtin_ia32_ptestz128: {
-    APValue SourceLHS, SourceRHS;
-    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
-        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
-      return false;
-
-    unsigned SourceLen = SourceLHS.getVectorLength();
-    bool Flag = true;
-    for (unsigned I = 0; I < SourceLen; ++I) {
-      const APInt &A = SourceLHS.getVectorElt(I).getInt();
-      const APInt &B = SourceRHS.getVectorElt(I).getInt();
-      if ((A & B) != 0) {
-        Flag = false;
-        break;
-      }
-    }
-
-    QualType ResultType = E->getType();
-    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
-    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
-    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
-    return Success(APValue(Result), E);
-  }
+  // case X86::BI__builtin_ia32_ptestz128: {
+  //   APValue SourceLHS, SourceRHS;
+  //   if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+  //       !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+  //     return false;
+
+  //   unsigned SourceLen = SourceLHS.getVectorLength();
+  //   bool Flag = true;
+  //   for (unsigned I = 0; I < SourceLen; ++I) {
+  //     const APInt &A = SourceLHS.getVectorElt(I).getInt();
+  //     const APInt &B = SourceRHS.getVectorElt(I).getInt();
+  //     if ((A & B) != 0) {
+  //       Flag = false;
+  //       break;
+  //     }
+  //   }
+
+  //   QualType ResultType = E->getType();
+  //   unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+  //   bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+  //   APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+  //   return Success(APValue(Result), E);
+  // }
 
     // case X86::BI__builtin_ia32_ptestz256:
 
@@ -14737,6 +14737,29 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         Result.setBitVal(P++, Val[I]);
     return Success(Result, E);
   }
+  case X86::BI__builtin_ia32_ptestz128: {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    unsigned SourceLen = SourceLHS.getVectorLength();
+    bool Flag = true;
+    for (unsigned I = 0; I < SourceLen; ++I) {
+      const APInt &A = SourceLHS.getVectorElt(I).getInt();
+      const APInt &B = SourceRHS.getVectorElt(I).getInt();
+      if ((A & B) != 0) {
+        Flag = false;
+        break;
+      }
+    }
+
+    QualType ResultType = E->getType();
+    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+    return Success(Result, E);
+  }
   }
 }
 

>From b1bbaf923af42d951829d27114b15c727aa2c598 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sun, 21 Sep 2025 18:33:20 -0700
Subject: [PATCH 15/21] Refactor test builtin handling to reduce boilerplate

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  18 ++--
 clang/lib/AST/ExprConstant.cpp           | 123 ++++++++++-------------
 clang/test/CodeGen/X86/avx-builtins.c    |   2 +
 clang/test/CodeGen/X86/sse41-builtins.c  |   1 +
 4 files changed, 65 insertions(+), 79 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8de8e935fea70..ac1e7f0a0d186 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,8 +2851,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
-                                   const CallExpr *Call) {
+static bool interp__builtin_test_op(InterpState &S, CodePtr OpPC,
+                                   const CallExpr *Call,
+llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
   const Pointer &LHS = S.Stk.pop<Pointer>();
   const Pointer &RHS = S.Stk.pop<Pointer>();
 
@@ -2873,7 +2874,7 @@ static bool interp__builtin_ptestz(InterpState &S, CodePtr OpPC,
     for (unsigned I = 0; I < SourceLen; ++I) {
       const APSInt A = LHS.elem<T>(I).toAPSInt();
       const APSInt B = RHS.elem<T>(I).toAPSInt();
-      if ((A & B) != 0) {
+      if (!Fn(A,B)) {
         Flag = false;
         break;
       }
@@ -3615,12 +3616,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         });
 
   case X86::BI__builtin_ia32_ptestz128:
-    return interp__builtin_ptestz(S, OpPC, Call);
-
-    // case X86::BI__builtin_ia32_ptestz256:
+  case X86::BI__builtin_ia32_ptestz256:
+    return interp__builtin_test_op(S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+      return (A & B) == 0;
+    });
 
-    // case X86::BI__builtin_ia32_ptestc128:
-    // case X86::BI__builtin_ia32_ptestc256:
+  // case X86::BI__builtin_ia32_ptestc128:
+  // case X86::BI__builtin_ia32_ptestc256:
 
     // case X86::BI__builtin_ia32_ptestnzc128:
     // case X86::BI__builtin_ia32_ptestnzc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 94b096e53e7ad..9b3116ce07d6e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12025,56 +12025,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
-  // case X86::BI__builtin_ia32_ptestz128: {
-  //   APValue SourceLHS, SourceRHS;
-  //   if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
-  //       !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
-  //     return false;
-
-  //   unsigned SourceLen = SourceLHS.getVectorLength();
-  //   bool Flag = true;
-  //   for (unsigned I = 0; I < SourceLen; ++I) {
-  //     const APInt &A = SourceLHS.getVectorElt(I).getInt();
-  //     const APInt &B = SourceRHS.getVectorElt(I).getInt();
-  //     if ((A & B) != 0) {
-  //       Flag = false;
-  //       break;
-  //     }
-  //   }
-
-  //   QualType ResultType = E->getType();
-  //   unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
-  //   bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
-  //   APSInt Result(APInt(BitWidth, Flag), ResultSigned);
-  //   return Success(APValue(Result), E);
-  // }
-
-    // case X86::BI__builtin_ia32_ptestz256:
-
-    // case X86::BI__builtin_ia32_ptestc128:
-    // case X86::BI__builtin_ia32_ptestc256:
-
-    // case X86::BI__builtin_ia32_ptestnzc128:
-    // case X86::BI__builtin_ia32_ptestnzc256:
-
-    // case X86::BI__builtin_ia32_vtestzps:
-    // case X86::BI__builtin_ia32_vtestzps256:
-
-    // case X86::BI__builtin_ia32_vtestcps:
-    // case X86::BI__builtin_ia32_vtestcps256:
-
-    // case X86::BI__builtin_ia32_vtestnzcps:
-    // case X86::BI__builtin_ia32_vtestnzcps256:
-
-    // case X86::BI__builtin_ia32_vtestzpd:
-    // case X86::BI__builtin_ia32_vtestzpd256:
-
-    // case X86::BI__builtin_ia32_vtestcpd:
-    // case X86::BI__builtin_ia32_vtestcpd256:
-
-    // case X86::BI__builtin_ia32_vtestnzcpd:
-    // case X86::BI__builtin_ia32_vtestnzcpd256:
-
   case Builtin::BI__builtin_elementwise_ctlz:
   case Builtin::BI__builtin_elementwise_cttz: {
     APValue SourceLHS;
@@ -13638,6 +13588,30 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
+
+  auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+    APValue SourceLHS, SourceRHS;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+      return false;
+
+    unsigned SourceLen = SourceLHS.getVectorLength();
+    bool Flag = true;
+    for (unsigned I = 0; I < SourceLen; ++I) {
+      const APInt &A = SourceLHS.getVectorElt(I).getInt();
+      const APInt &B = SourceRHS.getVectorElt(I).getInt();
+      if (!Fn(A, B)) {
+        Flag = false;
+        break;
+      }
+    }
+
+    QualType ResultType = E->getType();
+    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+    return Success(Result, E);
+  };                                              
   switch (BuiltinOp) {
   default:
     return false;
@@ -14737,29 +14711,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         Result.setBitVal(P++, Val[I]);
     return Success(Result, E);
   }
-  case X86::BI__builtin_ia32_ptestz128: {
-    APValue SourceLHS, SourceRHS;
-    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
-        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
-      return false;
+  case X86::BI__builtin_ia32_ptestz128: 
+  case X86::BI__builtin_ia32_ptestz256: {
+    return EvalTestOp([](const APInt& A, const APInt& B){
+        return (A & B) == 0;
+    });
+  }
 
-    unsigned SourceLen = SourceLHS.getVectorLength();
-    bool Flag = true;
-    for (unsigned I = 0; I < SourceLen; ++I) {
-      const APInt &A = SourceLHS.getVectorElt(I).getInt();
-      const APInt &B = SourceRHS.getVectorElt(I).getInt();
-      if ((A & B) != 0) {
-        Flag = false;
-        break;
-      }
-    }
+    // case X86::BI__builtin_ia32_ptestc128:
+    // case X86::BI__builtin_ia32_ptestc256:
 
-    QualType ResultType = E->getType();
-    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
-    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
-    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
-    return Success(Result, E);
-  }
+    // case X86::BI__builtin_ia32_ptestnzc128:
+    // case X86::BI__builtin_ia32_ptestnzc256:
+
+    // case X86::BI__builtin_ia32_vtestzps:
+    // case X86::BI__builtin_ia32_vtestzps256:
+
+    // case X86::BI__builtin_ia32_vtestcps:
+    // case X86::BI__builtin_ia32_vtestcps256:
+
+    // case X86::BI__builtin_ia32_vtestnzcps:
+    // case X86::BI__builtin_ia32_vtestnzcps256:
+
+    // case X86::BI__builtin_ia32_vtestzpd:
+    // case X86::BI__builtin_ia32_vtestzpd256:
+
+    // case X86::BI__builtin_ia32_vtestcpd:
+    // case X86::BI__builtin_ia32_vtestcpd256:
+
+    // case X86::BI__builtin_ia32_vtestnzcpd:
+    // case X86::BI__builtin_ia32_vtestnzcpd256:
   }
 }
 
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 347cd9ee6a667..a67da6614f91d 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2082,6 +2082,8 @@ int test_mm256_testz_si256(__m256i A, __m256i B) {
   // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_testz_si256(A, B);
 }
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,0}, (__m256i)(__v4du){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,1}, (__m256i)(__v4du){0,0,0,1}) == 0);
 
 __m256 test_mm256_undefined_ps(void) {
   // X64-LABEL: test_mm256_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 0073a474c9b85..64295145d1d76 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -472,3 +472,4 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
   return _mm_testz_si128(x, y);
 }
 TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,1}, (__m128i)(__v2di){0,1}) == 0);

>From 10eae4b173d277b5d54bf48c9981e71c05712cc2 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sun, 21 Sep 2025 18:34:27 -0700
Subject: [PATCH 16/21] Clang-format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 18 ++++-----
 clang/lib/AST/ExprConstant.cpp           | 50 ++++++++++++------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ac1e7f0a0d186..063187f0d8674 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,9 +2851,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_test_op(InterpState &S, CodePtr OpPC,
-                                   const CallExpr *Call,
-llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
+static bool interp__builtin_test_op(
+    InterpState &S, CodePtr OpPC, const CallExpr *Call,
+    llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
   const Pointer &LHS = S.Stk.pop<Pointer>();
   const Pointer &RHS = S.Stk.pop<Pointer>();
 
@@ -2874,7 +2874,7 @@ llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
     for (unsigned I = 0; I < SourceLen; ++I) {
       const APSInt A = LHS.elem<T>(I).toAPSInt();
       const APSInt B = RHS.elem<T>(I).toAPSInt();
-      if (!Fn(A,B)) {
+      if (!Fn(A, B)) {
         Flag = false;
         break;
       }
@@ -3617,12 +3617,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_ptestz128:
   case X86::BI__builtin_ia32_ptestz256:
-    return interp__builtin_test_op(S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
-      return (A & B) == 0;
-    });
+    return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const APSInt &A, const APSInt &B) { return (A & B) == 0; });
 
-  // case X86::BI__builtin_ia32_ptestc128:
-  // case X86::BI__builtin_ia32_ptestc256:
+    // case X86::BI__builtin_ia32_ptestc128:
+    // case X86::BI__builtin_ia32_ptestc256:
 
     // case X86::BI__builtin_ia32_ptestnzc128:
     // case X86::BI__builtin_ia32_ptestnzc256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9b3116ce07d6e..3db6225223a75 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13589,29 +13589,30 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
 
-  auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
-    APValue SourceLHS, SourceRHS;
-    if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
-        !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
-      return false;
+  auto EvalTestOp =
+      [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+        APValue SourceLHS, SourceRHS;
+        if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+            !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+          return false;
 
-    unsigned SourceLen = SourceLHS.getVectorLength();
-    bool Flag = true;
-    for (unsigned I = 0; I < SourceLen; ++I) {
-      const APInt &A = SourceLHS.getVectorElt(I).getInt();
-      const APInt &B = SourceRHS.getVectorElt(I).getInt();
-      if (!Fn(A, B)) {
-        Flag = false;
-        break;
-      }
-    }
+        unsigned SourceLen = SourceLHS.getVectorLength();
+        bool Flag = true;
+        for (unsigned I = 0; I < SourceLen; ++I) {
+          const APInt &A = SourceLHS.getVectorElt(I).getInt();
+          const APInt &B = SourceRHS.getVectorElt(I).getInt();
+          if (!Fn(A, B)) {
+            Flag = false;
+            break;
+          }
+        }
 
-    QualType ResultType = E->getType();
-    unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
-    bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
-    APSInt Result(APInt(BitWidth, Flag), ResultSigned);
-    return Success(Result, E);
-  };                                              
+        QualType ResultType = E->getType();
+        unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+        bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+        APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+        return Success(Result, E);
+      };
   switch (BuiltinOp) {
   default:
     return false;
@@ -14711,11 +14712,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         Result.setBitVal(P++, Val[I]);
     return Success(Result, E);
   }
-  case X86::BI__builtin_ia32_ptestz128: 
+  case X86::BI__builtin_ia32_ptestz128:
   case X86::BI__builtin_ia32_ptestz256: {
-    return EvalTestOp([](const APInt& A, const APInt& B){
-        return (A & B) == 0;
-    });
+    return EvalTestOp(
+        [](const APInt &A, const APInt &B) { return (A & B) == 0; });
   }
 
     // case X86::BI__builtin_ia32_ptestc128:

>From 7c3301b8b6790cb014427558f914a4e0c697b4b5 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 18:00:04 -0700
Subject: [PATCH 17/21] [X86][Clang] VectorExprEvaluator::VisitCallExpr /
 InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used
 in constexpr

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 157 +++++++++++++++++------
 clang/lib/AST/ExprConstant.cpp           | 145 +++++++++++++++------
 clang/test/CodeGen/X86/avx-builtins.c    |  58 ++++++++-
 clang/test/CodeGen/X86/sse41-builtins.c  |  20 ++-
 4 files changed, 300 insertions(+), 80 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 063187f0d8674..97e69868771de 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2853,9 +2853,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
 
 static bool interp__builtin_test_op(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
-    llvm::function_ref<bool(const APSInt &A, const APSInt &B)> Fn) {
-  const Pointer &LHS = S.Stk.pop<Pointer>();
+    llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen)> Fn) {
   const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
 
   assert(LHS.getNumElems() == RHS.getNumElems());
   assert(LHS.getFieldDesc()->isPrimitiveArray() &&
@@ -2865,23 +2865,11 @@ static bool interp__builtin_test_op(
                                                 getElemType(RHS)))
     return false;
 
-  unsigned SourceLen = LHS.getNumElems();
+  const unsigned SourceLen = LHS.getNumElems();
   const QualType ElemQT = getElemType(LHS);
   const OptPrimType ElemPT = S.getContext().classify(ElemQT);
 
-  bool Flag = true;
-  INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
-    for (unsigned I = 0; I < SourceLen; ++I) {
-      const APSInt A = LHS.elem<T>(I).toAPSInt();
-      const APSInt B = RHS.elem<T>(I).toAPSInt();
-      if (!Fn(A, B)) {
-        Flag = false;
-        break;
-      }
-    }
-  });
-
-  pushInteger(S, Flag ? 1 : 0, Call->getType());
+  pushInteger(S, Fn(LHS, RHS, ElemPT, SourceLen) ? 1 : 0, Call->getType());
   return true;
 }
 
@@ -3619,32 +3607,123 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_ptestz256:
     return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const APSInt &A, const APSInt &B) { return (A & B) == 0; });
-
-    // case X86::BI__builtin_ia32_ptestc128:
-    // case X86::BI__builtin_ia32_ptestc256:
-
-    // case X86::BI__builtin_ia32_ptestnzc128:
-    // case X86::BI__builtin_ia32_ptestnzc256:
-
-    // case X86::BI__builtin_ia32_vtestzps:
-    // case X86::BI__builtin_ia32_vtestzps256:
-
-    // case X86::BI__builtin_ia32_vtestcps:
-    // case X86::BI__builtin_ia32_vtestcps256:
-
-    // case X86::BI__builtin_ia32_vtestnzcps:
-    // case X86::BI__builtin_ia32_vtestnzcps256:
-
-    // case X86::BI__builtin_ia32_vtestzpd:
-    // case X86::BI__builtin_ia32_vtestzpd256:
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              const APSInt A = LHS.elem<T>(I).toAPSInt();
+              const APSInt B = RHS.elem<T>(I).toAPSInt();
+              if (!((A & B) == 0)) {
+                return false;
+              }
+            }
+          });
+          return true;
+        });
 
-    // case X86::BI__builtin_ia32_vtestcpd:
-    // case X86::BI__builtin_ia32_vtestcpd256:
+  case X86::BI__builtin_ia32_ptestc128:
+  case X86::BI__builtin_ia32_ptestc256:
+        return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              const APSInt A = LHS.elem<T>(I).toAPSInt();
+              const APSInt B = RHS.elem<T>(I).toAPSInt();
+              if (!((~A & B) == 0)) {
+                return false;
+              }
+            }
+          });
+          return true;
+        });
 
-    // case X86::BI__builtin_ia32_vtestnzcpd:
-    // case X86::BI__builtin_ia32_vtestnzcpd256:
+  case X86::BI__builtin_ia32_ptestnzc128:
+  case X86::BI__builtin_ia32_ptestnzc256:
+        return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+            bool Flag1 = false;
+            bool Flag2 = false;
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              const APSInt A = LHS.elem<T>(I).toAPSInt();
+              const APSInt B = RHS.elem<T>(I).toAPSInt();
+              if ((A & B) != 0) {
+                Flag1 = true;
+              }
+              if ((~A & B) != 0) {
+                Flag2 = true;
+              }
+            }
+            return Flag1 && Flag2;
+          });
+        });
 
+  case X86::BI__builtin_ia32_vtestzps:
+  case X86::BI__builtin_ia32_vtestzps256:
+    case X86::BI__builtin_ia32_vtestzpd:
+  case X86::BI__builtin_ia32_vtestzpd256:
+    return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              using T = PrimConv<PT_Float>::T;
+              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const unsigned SignBit = A.getBitWidth() - 1;
+              const bool ASigned = A[SignBit];
+              const bool BSigned = B[SignBit];
+              if (!((ASigned && BSigned) == 0)) {
+                return false;
+              }
+            }
+            return true;
+        });
+  case X86::BI__builtin_ia32_vtestcps:
+  case X86::BI__builtin_ia32_vtestcps256:
+    case X86::BI__builtin_ia32_vtestcpd:
+  case X86::BI__builtin_ia32_vtestcpd256:
+    return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              using T = PrimConv<PT_Float>::T;
+              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const unsigned SignBit = A.getBitWidth() - 1;
+              const bool ASigned = A[SignBit];
+              const bool BSigned = B[SignBit];
+              if (!((!ASigned && BSigned) == 0)) {
+                return false;
+              }
+            }
+            return true;
+        });
+  case X86::BI__builtin_ia32_vtestnzcps:
+  case X86::BI__builtin_ia32_vtestnzcps256:
+    case X86::BI__builtin_ia32_vtestnzcpd:
+  case X86::BI__builtin_ia32_vtestnzcpd256:
+    return interp__builtin_test_op(
+        S, OpPC, Call,
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+          bool Flag1 = false;
+          bool Flag2 = false;
+            for (unsigned I = 0; I < SourceLen; ++I) {
+              using T = PrimConv<PT_Float>::T;
+              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+              const unsigned SignBit = A.getBitWidth() - 1;
+              const bool ASigned = A[SignBit];
+              const bool BSigned = B[SignBit];
+              if ((ASigned && BSigned) != 0) {
+                Flag1 = true;
+              }
+              if ((!ASigned && BSigned) != 0) {
+                Flag2 = true;
+              }
+            }
+            return Flag1 && Flag2;
+        });
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
   case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3db6225223a75..cc3e19230a4b2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13590,27 +13590,17 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
 
   auto EvalTestOp =
-      [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+      [&](llvm::function_ref<bool(const APValue &, const APValue &, const unsigned SourceLen)> Fn) {
         APValue SourceLHS, SourceRHS;
         if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
             !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
           return false;
 
-        unsigned SourceLen = SourceLHS.getVectorLength();
-        bool Flag = true;
-        for (unsigned I = 0; I < SourceLen; ++I) {
-          const APInt &A = SourceLHS.getVectorElt(I).getInt();
-          const APInt &B = SourceRHS.getVectorElt(I).getInt();
-          if (!Fn(A, B)) {
-            Flag = false;
-            break;
-          }
-        }
-
         QualType ResultType = E->getType();
         unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
         bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
-        APSInt Result(APInt(BitWidth, Flag), ResultSigned);
+        unsigned SourceLen = SourceLHS.getVectorLength();
+        APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)), ResultSigned);
         return Success(Result, E);
       };
   switch (BuiltinOp) {
@@ -14715,32 +14705,111 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   case X86::BI__builtin_ia32_ptestz128:
   case X86::BI__builtin_ia32_ptestz256: {
     return EvalTestOp(
-        [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getInt();
+            if (!((A & B) == 0)) {
+              return false;
+            }
+          }
+          return true;
+       });
+  }
+  case X86::BI__builtin_ia32_ptestc128:
+  case X86::BI__builtin_ia32_ptestc256: {
+    return EvalTestOp(
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getInt();
+            if (!((~A & B) == 0)) {
+              return false;
+            }
+          }
+          return true;
+       });
+  }
+  case X86::BI__builtin_ia32_ptestnzc128:
+  case X86::BI__builtin_ia32_ptestnzc256: {
+        return EvalTestOp(
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          int Flag1 = false, Flag2 = false;
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getInt();
+            if ((A & B) != 0) {
+              Flag1 = true;
+            }
+            if ((~A & B) != 0) {
+              Flag2 = true;
+            }
+          }
+          return Flag1 && Flag2;
+       });
+  }
+  case X86::BI__builtin_ia32_vtestzps:
+  case X86::BI__builtin_ia32_vtestzps256:
+      case X86::BI__builtin_ia32_vtestzpd:
+    case X86::BI__builtin_ia32_vtestzpd256: {
+    return EvalTestOp(
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if (!((ASigned && BSigned) == 0)) {
+              return false;
+            }
+          }
+          return true;
+       });
+  }
+  case X86::BI__builtin_ia32_vtestcps:
+  case X86::BI__builtin_ia32_vtestcps256: 
+      case X86::BI__builtin_ia32_vtestcpd:
+    case X86::BI__builtin_ia32_vtestcpd256:{
+        return EvalTestOp(
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if (!((!ASigned && BSigned) == 0)) {
+              return false;
+            }
+          }
+          return true;
+       });
+  }
+  case X86::BI__builtin_ia32_vtestnzcps:
+  case X86::BI__builtin_ia32_vtestnzcps256:
+      case X86::BI__builtin_ia32_vtestnzcpd:
+    case X86::BI__builtin_ia32_vtestnzcpd256: {
+            return EvalTestOp(
+        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
+          bool Flag1 = false;
+          bool Flag2 = false;
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if ((ASigned && BSigned) != 0) {
+              Flag1 = true;
+            }
+            if ((!ASigned && BSigned) != 0) {
+              Flag2 = true;
+            }
+          }
+          return Flag1 && Flag2;
+       });
   }
-
-    // case X86::BI__builtin_ia32_ptestc128:
-    // case X86::BI__builtin_ia32_ptestc256:
-
-    // case X86::BI__builtin_ia32_ptestnzc128:
-    // case X86::BI__builtin_ia32_ptestnzc256:
-
-    // case X86::BI__builtin_ia32_vtestzps:
-    // case X86::BI__builtin_ia32_vtestzps256:
-
-    // case X86::BI__builtin_ia32_vtestcps:
-    // case X86::BI__builtin_ia32_vtestcps256:
-
-    // case X86::BI__builtin_ia32_vtestnzcps:
-    // case X86::BI__builtin_ia32_vtestnzcps256:
-
-    // case X86::BI__builtin_ia32_vtestzpd:
-    // case X86::BI__builtin_ia32_vtestzpd256:
-
-    // case X86::BI__builtin_ia32_vtestcpd:
-    // case X86::BI__builtin_ia32_vtestcpd256:
-
-    // case X86::BI__builtin_ia32_vtestnzcpd:
-    // case X86::BI__builtin_ia32_vtestnzcpd256:
   }
 }
 
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index a67da6614f91d..b7007245d7e9f 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1998,92 +1998,146 @@ int test_mm_testc_pd(__m128d A, __m128d B) {
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_testc_pd(A, B);
 }
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0},
+                            (__m128d)(__v2df){-3.0,  4.0}) == 1);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+                            (__m128d)(__v2df){-3.0,  4.0}) == 0);
+TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},
+                            (__m128d)(__v2df){ 0.0,  5.0}) == 1);
 
 int test_mm256_testc_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_testc_pd
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_testc_pd(A, B);
 }
+TEST_CONSTEXPR(_mm256_testc_pd(
+  (__m256d)(__v4df){-1.0,  2.0, -3.0,  4.0},
+  (__m256d)(__v4df){-5.0,  6.0,  7.0,  8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_pd(
+  (__m256d)(__v4df){ 1.0,  2.0, -3.0,  4.0},
+  (__m256d)(__v4df){-5.0,  6.0,  7.0,  8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testc_pd(
+  (__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0},
+  (__m256d)(__v4df){ 5.0,  6.0,  7.0,  8.0}) == 1);
 
 int test_mm_testc_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_testc_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_testc_ps(A, B);
 }
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-1,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 1);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,2384.23,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 0);
+TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-2,-9001.1009,-93}, (__m128)(__v4sf){-1.0,-9001,-0.9001,-1000}) == 1);
 
 int test_mm256_testc_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_testc_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_testc_ps(A, B);
 }
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
 
 int test_mm256_testc_si256(__m256i A, __m256i B) {
   // CHECK-LABEL: test_mm256_testc_si256
   // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_testc_si256(A, B);
 }
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,1,0}) == 1);
+TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){-1,-2,1,3}, (__m256i)(__v4di){0,-1,1,1}) == 0);
 
 int test_mm_testnzc_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_testnzc_pd
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_testnzc_pd(A, B);
 }
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0},
+                              (__m128d)(__v2df){-3.0, -4.0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0},
+                              (__m128d)(__v2df){+3.0, -4.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0},
+                              (__m128d)(__v2df){-3.0, +4.0}) == 0);
 
 int test_mm256_testnzc_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_testnzc_pd
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_testnzc_pd(A, B);
 }
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);  
 
 int test_mm_testnzc_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_testnzc_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_testnzc_ps(A, B);
 }
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-9.9,987,-67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-810.0,-1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 0);
 
 int test_mm256_testnzc_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_testnzc_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_testnzc_ps(A, B);
 }
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, -2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0);
 
 int test_mm256_testnzc_si256(__m256i A, __m256i B) {
   // CHECK-LABEL: test_mm256_testnzc_si256
   // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_testnzc_si256(A, B);
 }
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0);
 
 int test_mm_testz_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_testz_pd
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
   return _mm_testz_pd(A, B);
 }
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){-1,0}, (__m128d)(__v2df){0,-1}) == 1);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,-13.13}, (__m128d)(__v2df){0,-11.1}) == 0);
+TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,5.13}, (__m128d)(__v2df){0,-113.1324823}) == 1);
+
 
 int test_mm256_testz_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_testz_pd
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_testz_pd(A, B);
 }
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,0,-47.47,0.00002}, (__m256d)(__v4df){0,-1,74.0101,-1}) == 1);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,3249.9,-47.47,-0.00002}, (__m256d)(__v4df){0,-1,74.0101,-9999900}) == 0);
+TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){0,0,-8,0}, (__m256d)(__v4df){0,-1,-101,-123}) == 0);
 
 int test_mm_testz_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_testz_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   return _mm_testz_ps(A, B);
 }
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){-9.9,987,67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){10.0,1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0);
+TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 1);
 
 int test_mm256_testz_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_testz_ps
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
   return _mm256_testz_ps(A, B);
 }
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0);
+TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 1);
 
 int test_mm256_testz_si256(__m256i A, __m256i B) {
   // CHECK-LABEL: test_mm256_testz_si256
   // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_testz_si256(A, B);
 }
-TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,0}, (__m256i)(__v4du){0,0,0,0}) == 1);
-TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4du){0,0,0,1}, (__m256i)(__v4du){0,0,0,1}) == 0);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,-1,0}) == 0);
+TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){-1,0,1,0}, (__m256i)(__v4di){0,-1,0,1}) == 1);
 
 __m256 test_mm256_undefined_ps(void) {
   // X64-LABEL: test_mm256_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 64295145d1d76..f379c572b122d 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -441,30 +441,47 @@ int test_mm_test_all_ones(__m128i x) {
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_test_all_ones(x);
 }
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, -1})) == 1);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1,  0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v4si){-1, -1, -1, 0x7FFFFFFF})) == 0);
 
 int test_mm_test_all_zeros(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_test_all_zeros
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_test_all_zeros(x, y);
 }
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,0}), ((__m128i)(__v2di){0,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0xFF00,0}), ((__m128i)(__v2di){0x00FF,0})) == 1);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){1,0}), ((__m128i)(__v2di){-1,0})) == 0);
+TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,1}), ((__m128i)(__v2di){0,-1})) == 0);
 
 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_test_mix_ones_zeros
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_test_mix_ones_zeros(x, y);
 }
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xFF, 0}), ((__m128i)(__v2di){0xF0, 1})) == 1);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xF0, 0}), ((__m128i)(__v2di){0x0F, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){-1, -1}), ((__m128i)(__v2di){1, 0})) == 0);
+TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0, 0}), ((__m128i)(__v2di){0, 0})) == 0);
 
 int test_mm_testc_si128(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_testc_si128
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testc_si128(x, y);
 }
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,-1}, (__m128i)(__v2di){0,1}) == 1);
 
 int test_mm_testnzc_si128(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_testnzc_si128
   // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_testnzc_si128(x, y);
 }
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){3,0}, (__m128i)(__v2di){1,1}) == 1);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){32,-1}, (__m128i)(__v2di){15,0}) == 0);
+TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){0,999}, (__m128i)(__v2di){0,999}) == 0);
 
 int test_mm_testz_si128(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_testz_si128
@@ -472,4 +489,5 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
   return _mm_testz_si128(x, y);
 }
 TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1);
-TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,1}, (__m128i)(__v2di){0,1}) == 0);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0);
+TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){0,1}) == 1);

>From 6f140099c9058a0546b7703ae6c4ae0e2508199b Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 18:00:32 -0700
Subject: [PATCH 18/21] Clang-format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 104 +++++++------
 clang/lib/AST/ExprConstant.cpp           | 189 ++++++++++++-----------
 2 files changed, 152 insertions(+), 141 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 97e69868771de..14fe009a6f281 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2853,7 +2853,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
 
 static bool interp__builtin_test_op(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
-    llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen)> Fn) {
+    llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS,
+                            const OptPrimType ElemPT, const unsigned SourceLen)>
+        Fn) {
   const Pointer &RHS = S.Stk.pop<Pointer>();
   const Pointer &LHS = S.Stk.pop<Pointer>();
 
@@ -3607,7 +3609,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_ptestz256:
     return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
           INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
             for (unsigned I = 0; I < SourceLen; ++I) {
               const APSInt A = LHS.elem<T>(I).toAPSInt();
@@ -3622,9 +3625,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_ptestc128:
   case X86::BI__builtin_ia32_ptestc256:
-        return interp__builtin_test_op(
+    return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
           INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
             for (unsigned I = 0; I < SourceLen; ++I) {
               const APSInt A = LHS.elem<T>(I).toAPSInt();
@@ -3639,9 +3643,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_ptestnzc128:
   case X86::BI__builtin_ia32_ptestnzc256:
-        return interp__builtin_test_op(
+    return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
           INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
             bool Flag1 = false;
             bool Flag2 = false;
@@ -3661,68 +3666,71 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
 
   case X86::BI__builtin_ia32_vtestzps:
   case X86::BI__builtin_ia32_vtestzps256:
-    case X86::BI__builtin_ia32_vtestzpd:
+  case X86::BI__builtin_ia32_vtestzpd:
   case X86::BI__builtin_ia32_vtestzpd256:
     return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              using T = PrimConv<PT_Float>::T;
-              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const unsigned SignBit = A.getBitWidth() - 1;
-              const bool ASigned = A[SignBit];
-              const bool BSigned = B[SignBit];
-              if (!((ASigned && BSigned) == 0)) {
-                return false;
-              }
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            using T = PrimConv<PT_Float>::T;
+            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if (!((ASigned && BSigned) == 0)) {
+              return false;
             }
-            return true;
+          }
+          return true;
         });
   case X86::BI__builtin_ia32_vtestcps:
   case X86::BI__builtin_ia32_vtestcps256:
-    case X86::BI__builtin_ia32_vtestcpd:
+  case X86::BI__builtin_ia32_vtestcpd:
   case X86::BI__builtin_ia32_vtestcpd256:
     return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              using T = PrimConv<PT_Float>::T;
-              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const unsigned SignBit = A.getBitWidth() - 1;
-              const bool ASigned = A[SignBit];
-              const bool BSigned = B[SignBit];
-              if (!((!ASigned && BSigned) == 0)) {
-                return false;
-              }
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            using T = PrimConv<PT_Float>::T;
+            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if (!((!ASigned && BSigned) == 0)) {
+              return false;
             }
-            return true;
+          }
+          return true;
         });
   case X86::BI__builtin_ia32_vtestnzcps:
   case X86::BI__builtin_ia32_vtestnzcps256:
-    case X86::BI__builtin_ia32_vtestnzcpd:
+  case X86::BI__builtin_ia32_vtestnzcpd:
   case X86::BI__builtin_ia32_vtestnzcpd256:
     return interp__builtin_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT, const unsigned SourceLen) { 
+        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
+           const unsigned SourceLen) {
           bool Flag1 = false;
           bool Flag2 = false;
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              using T = PrimConv<PT_Float>::T;
-              const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-              const unsigned SignBit = A.getBitWidth() - 1;
-              const bool ASigned = A[SignBit];
-              const bool BSigned = B[SignBit];
-              if ((ASigned && BSigned) != 0) {
-                Flag1 = true;
-              }
-              if ((!ASigned && BSigned) != 0) {
-                Flag2 = true;
-              }
+          for (unsigned I = 0; I < SourceLen; ++I) {
+            using T = PrimConv<PT_Float>::T;
+            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+            const unsigned SignBit = A.getBitWidth() - 1;
+            const bool ASigned = A[SignBit];
+            const bool BSigned = B[SignBit];
+            if ((ASigned && BSigned) != 0) {
+              Flag1 = true;
             }
-            return Flag1 && Flag2;
+            if ((!ASigned && BSigned) != 0) {
+              Flag2 = true;
+            }
+          }
+          return Flag1 && Flag2;
         });
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index cc3e19230a4b2..41823b8159ee9 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13590,7 +13590,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
 
   auto EvalTestOp =
-      [&](llvm::function_ref<bool(const APValue &, const APValue &, const unsigned SourceLen)> Fn) {
+      [&](llvm::function_ref<bool(const APValue &, const APValue &,
+                                  const unsigned SourceLen)>
+              Fn) {
         APValue SourceLHS, SourceRHS;
         if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
             !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
@@ -13600,7 +13602,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
         bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
         unsigned SourceLen = SourceLHS.getVectorLength();
-        APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)), ResultSigned);
+        APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
+                      ResultSigned);
         return Success(Result, E);
       };
   switch (BuiltinOp) {
@@ -14704,111 +14707,111 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   }
   case X86::BI__builtin_ia32_ptestz128:
   case X86::BI__builtin_ia32_ptestz256: {
-    return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getInt();
-            if (!((A & B) == 0)) {
-              return false;
-            }
-          }
-          return true;
-       });
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getInt();
+        if (!((A & B) == 0)) {
+          return false;
+        }
+      }
+      return true;
+    });
   }
   case X86::BI__builtin_ia32_ptestc128:
   case X86::BI__builtin_ia32_ptestc256: {
-    return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getInt();
-            if (!((~A & B) == 0)) {
-              return false;
-            }
-          }
-          return true;
-       });
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getInt();
+        if (!((~A & B) == 0)) {
+          return false;
+        }
+      }
+      return true;
+    });
   }
   case X86::BI__builtin_ia32_ptestnzc128:
   case X86::BI__builtin_ia32_ptestnzc256: {
-        return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          int Flag1 = false, Flag2 = false;
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getInt();
-            if ((A & B) != 0) {
-              Flag1 = true;
-            }
-            if ((~A & B) != 0) {
-              Flag2 = true;
-            }
-          }
-          return Flag1 && Flag2;
-       });
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      int Flag1 = false, Flag2 = false;
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getInt();
+        if ((A & B) != 0) {
+          Flag1 = true;
+        }
+        if ((~A & B) != 0) {
+          Flag2 = true;
+        }
+      }
+      return Flag1 && Flag2;
+    });
   }
   case X86::BI__builtin_ia32_vtestzps:
   case X86::BI__builtin_ia32_vtestzps256:
-      case X86::BI__builtin_ia32_vtestzpd:
-    case X86::BI__builtin_ia32_vtestzpd256: {
-    return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if (!((ASigned && BSigned) == 0)) {
-              return false;
-            }
-          }
-          return true;
-       });
+  case X86::BI__builtin_ia32_vtestzpd:
+  case X86::BI__builtin_ia32_vtestzpd256: {
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const unsigned SignBit = A.getBitWidth() - 1;
+        const bool ASigned = A[SignBit];
+        const bool BSigned = B[SignBit];
+        if (!((ASigned && BSigned) == 0)) {
+          return false;
+        }
+      }
+      return true;
+    });
   }
   case X86::BI__builtin_ia32_vtestcps:
-  case X86::BI__builtin_ia32_vtestcps256: 
-      case X86::BI__builtin_ia32_vtestcpd:
-    case X86::BI__builtin_ia32_vtestcpd256:{
-        return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if (!((!ASigned && BSigned) == 0)) {
-              return false;
-            }
-          }
-          return true;
-       });
+  case X86::BI__builtin_ia32_vtestcps256:
+  case X86::BI__builtin_ia32_vtestcpd:
+  case X86::BI__builtin_ia32_vtestcpd256: {
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const unsigned SignBit = A.getBitWidth() - 1;
+        const bool ASigned = A[SignBit];
+        const bool BSigned = B[SignBit];
+        if (!((!ASigned && BSigned) == 0)) {
+          return false;
+        }
+      }
+      return true;
+    });
   }
   case X86::BI__builtin_ia32_vtestnzcps:
   case X86::BI__builtin_ia32_vtestnzcps256:
-      case X86::BI__builtin_ia32_vtestnzcpd:
-    case X86::BI__builtin_ia32_vtestnzcpd256: {
-            return EvalTestOp(
-        [](const APValue &SourceLHS, const APValue &SourceRHS, const unsigned SourceLen) {
-          bool Flag1 = false;
-          bool Flag2 = false;
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if ((ASigned && BSigned) != 0) {
-              Flag1 = true;
-            }
-            if ((!ASigned && BSigned) != 0) {
-              Flag2 = true;
-            }
-          }
-          return Flag1 && Flag2;
-       });
+  case X86::BI__builtin_ia32_vtestnzcpd:
+  case X86::BI__builtin_ia32_vtestnzcpd256: {
+    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
+                         const unsigned SourceLen) {
+      bool Flag1 = false;
+      bool Flag2 = false;
+      for (unsigned I = 0; I < SourceLen; ++I) {
+        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+        const unsigned SignBit = A.getBitWidth() - 1;
+        const bool ASigned = A[SignBit];
+        const bool BSigned = B[SignBit];
+        if ((ASigned && BSigned) != 0) {
+          Flag1 = true;
+        }
+        if ((!ASigned && BSigned) != 0) {
+          Flag2 = true;
+        }
+      }
+      return Flag1 && Flag2;
+    });
   }
   }
 }

>From 3be64135e2795539f65fc940faea67482c7b8ef2 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 19:07:27 -0700
Subject: [PATCH 19/21] Rebase

---
 clang/lib/AST/ExprConstant.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b38203c355719..ffc4b600f9b00 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14889,6 +14889,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
   }
 }
+}
 
 /// Determine whether this is a pointer past the end of the complete
 /// object referred to by the lvalue.

>From a820f7978e5f87b8c6123911ef3682c8c6896e86 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 23 Sep 2025 19:12:52 -0700
Subject: [PATCH 20/21] Remove extra newlines

---
 clang/lib/AST/ExprConstant.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ffc4b600f9b00..4c05f09a4025e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13588,8 +13588,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
-
-
   auto EvalTestOp =
       [&](llvm::function_ref<bool(const APValue &, const APValue &,
                                   const unsigned SourceLen)>

>From ef27cd0828e972abaa99b1bbf1a92096c7482631 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 26 Sep 2025 01:12:44 -0700
Subject: [PATCH 21/21] Refactor out boilerplate

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 170 ++++++++---------------
 clang/lib/AST/ExprConstant.cpp           | 154 +++++++-------------
 clang/test/CodeGen/X86/avx-builtins.c    |   5 +-
 3 files changed, 114 insertions(+), 215 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index aed1615aed75f..e7484dc7d662d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2843,11 +2843,9 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_test_op(
+static bool interp__builtin_ia32_test_op(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
-    llvm::function_ref<bool(const Pointer &LHS, const Pointer &RHS,
-                            const OptPrimType ElemPT, const unsigned SourceLen)>
-        Fn) {
+    llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
   const Pointer &RHS = S.Stk.pop<Pointer>();
   const Pointer &LHS = S.Stk.pop<Pointer>();
 
@@ -2863,8 +2861,47 @@ static bool interp__builtin_test_op(
   const QualType ElemQT = getElemType(LHS);
   const OptPrimType ElemPT = S.getContext().classify(ElemQT);
 
-  pushInteger(S, Fn(LHS, RHS, ElemPT, SourceLen) ? 1 : 0, Call->getType());
-  return true;
+  if (ElemQT->isIntegerType()) {
+    APInt FirstElem;
+    INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+      FirstElem = LHS.elem<T>(0).toAPSInt();
+    });
+    const unsigned LaneWidth = FirstElem.getBitWidth();
+
+    APInt AWide(LaneWidth * SourceLen, 0);
+    APInt BWide(LaneWidth * SourceLen, 0);
+
+    for (unsigned I = 0; I != SourceLen; ++I) {
+      APInt ALane;
+      APInt BLane;
+      INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+        ALane = LHS.elem<T>(I).toAPSInt();
+        BLane = RHS.elem<T>(I).toAPSInt();
+      });
+      AWide.insertBits(ALane, I * LaneWidth);
+      BWide.insertBits(BLane, I * LaneWidth);
+    }
+    pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType());
+    return true;
+  } else if (ElemQT->isFloatingType()) {
+    APInt ASignBits(SourceLen, 0);
+    APInt BSignBits(SourceLen, 0);
+
+    for (unsigned I = 0; I != SourceLen; ++I) {
+      using T = PrimConv<PT_Float>::T;
+      APInt ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+      APInt BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
+      const unsigned SignBit = ALane.getBitWidth() - 1;
+      const bool ALaneSign = ALane[SignBit];
+      const bool BLaneSign = BLane[SignBit];
+      ASignBits.setBitVal(I, ALaneSign);
+      BSignBits.setBitVal(I, BLaneSign);
+    }
+    pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType());
+    return true;
+  } else { // Must be integer or float type
+    return false;
+  }
 }
 
 static bool interp__builtin_elementwise_triop(
@@ -3602,133 +3639,38 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
           return ((APInt)C).isNegative() ? T : F;
         });
-
   case X86::BI__builtin_ia32_ptestz128:
   case X86::BI__builtin_ia32_ptestz256:
-    return interp__builtin_test_op(
-        S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              const APSInt A = LHS.elem<T>(I).toAPSInt();
-              const APSInt B = RHS.elem<T>(I).toAPSInt();
-              if (!((A & B) == 0)) {
-                return false;
-              }
-            }
-          });
-          return true;
-        });
-
-  case X86::BI__builtin_ia32_ptestc128:
-  case X86::BI__builtin_ia32_ptestc256:
-    return interp__builtin_test_op(
-        S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              const APSInt A = LHS.elem<T>(I).toAPSInt();
-              const APSInt B = RHS.elem<T>(I).toAPSInt();
-              if (!((~A & B) == 0)) {
-                return false;
-              }
-            }
-          });
-          return true;
-        });
-
-  case X86::BI__builtin_ia32_ptestnzc128:
-  case X86::BI__builtin_ia32_ptestnzc256:
-    return interp__builtin_test_op(
-        S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
-            bool Flag1 = false;
-            bool Flag2 = false;
-            for (unsigned I = 0; I < SourceLen; ++I) {
-              const APSInt A = LHS.elem<T>(I).toAPSInt();
-              const APSInt B = RHS.elem<T>(I).toAPSInt();
-              if ((A & B) != 0) {
-                Flag1 = true;
-              }
-              if ((~A & B) != 0) {
-                Flag2 = true;
-              }
-            }
-            return Flag1 && Flag2;
-          });
-        });
-
   case X86::BI__builtin_ia32_vtestzps:
   case X86::BI__builtin_ia32_vtestzps256:
   case X86::BI__builtin_ia32_vtestzpd:
   case X86::BI__builtin_ia32_vtestzpd256:
-    return interp__builtin_test_op(
+    return interp__builtin_ia32_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            using T = PrimConv<PT_Float>::T;
-            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if (!((ASigned && BSigned) == 0)) {
-              return false;
-            }
-          }
-          return true;
+        [](const APInt &A, const APInt &B) {
+          return (A & B) == 0;
         });
+  case X86::BI__builtin_ia32_ptestc128:
+  case X86::BI__builtin_ia32_ptestc256:
   case X86::BI__builtin_ia32_vtestcps:
   case X86::BI__builtin_ia32_vtestcps256:
   case X86::BI__builtin_ia32_vtestcpd:
   case X86::BI__builtin_ia32_vtestcpd256:
-    return interp__builtin_test_op(
+    return interp__builtin_ia32_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            using T = PrimConv<PT_Float>::T;
-            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if (!((!ASigned && BSigned) == 0)) {
-              return false;
-            }
-          }
-          return true;
+        [](const APInt &A, const APInt &B) {
+          return (~A & B) == 0;
         });
+  case X86::BI__builtin_ia32_ptestnzc128:
+  case X86::BI__builtin_ia32_ptestnzc256:
   case X86::BI__builtin_ia32_vtestnzcps:
   case X86::BI__builtin_ia32_vtestnzcps256:
   case X86::BI__builtin_ia32_vtestnzcpd:
   case X86::BI__builtin_ia32_vtestnzcpd256:
-    return interp__builtin_test_op(
+    return interp__builtin_ia32_test_op(
         S, OpPC, Call,
-        [](const Pointer &LHS, const Pointer &RHS, const OptPrimType ElemPT,
-           const unsigned SourceLen) {
-          bool Flag1 = false;
-          bool Flag2 = false;
-          for (unsigned I = 0; I < SourceLen; ++I) {
-            using T = PrimConv<PT_Float>::T;
-            const APInt A = LHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const APInt B = RHS.elem<T>(I).getAPFloat().bitcastToAPInt();
-            const unsigned SignBit = A.getBitWidth() - 1;
-            const bool ASigned = A[SignBit];
-            const bool BSigned = B[SignBit];
-            if ((ASigned && BSigned) != 0) {
-              Flag1 = true;
-            }
-            if ((!ASigned && BSigned) != 0) {
-              Flag2 = true;
-            }
-          }
-          return Flag1 && Flag2;
+        [](const APInt &A, const APInt &B) {
+          return ((A & B) != 0) && ((~A & B) != 0);
         });
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4c05f09a4025e..232665dff7a77 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13589,21 +13589,54 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
   auto EvalTestOp =
-      [&](llvm::function_ref<bool(const APValue &, const APValue &,
-                                  const unsigned SourceLen)>
-              Fn) {
+      [&](llvm::function_ref<bool(const APInt &, const APInt &)>Fn) {
         APValue SourceLHS, SourceRHS;
         if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
             !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
           return false;
 
-        QualType ResultType = E->getType();
-        unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
-        bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
+        // QualType ResultType = E->getType();
+        // unsigned BitWidth = Info.Ctx.getIntWidth(ResultType);
+        // bool ResultSigned = ResultType->isUnsignedIntegerOrEnumerationType();
         unsigned SourceLen = SourceLHS.getVectorLength();
-        APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
-                      ResultSigned);
-        return Success(Result, E);
+
+        const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+        QualType ElemQT = VT->getElementType();
+
+        if (ElemQT->isIntegerType()) {
+          const unsigned LaneWidth = SourceLHS.getVectorElt(0).getInt().getBitWidth();
+          APInt AWide(LaneWidth * SourceLen, 0);
+          APInt BWide(LaneWidth * SourceLen, 0);
+
+          for (unsigned I = 0; I != SourceLen; ++I) {
+            APInt ALane = SourceLHS.getVectorElt(I).getInt();
+            APInt BLane = SourceRHS.getVectorElt(I).getInt();
+            AWide.insertBits(ALane, I * LaneWidth);
+            BWide.insertBits(BLane, I * LaneWidth);
+          }
+          return Success(Fn(AWide, BWide), E);
+
+        } else if (ElemQT->isFloatingType()) {
+          APInt ASignBits(SourceLen, 0);
+          APInt BSignBits(SourceLen, 0);
+
+          for (unsigned I = 0; I != SourceLen; ++I) {
+            APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
+            const unsigned SignBit = ALane.getBitWidth() - 1;
+            const bool ALaneSign = ALane[SignBit];
+            const bool BLaneSign = BLane[SignBit];
+            ASignBits.setBitVal(I, ALaneSign);
+            BSignBits.setBitVal(I, BLaneSign);
+          }
+          return Success(Fn(ASignBits, BSignBits), E);
+
+        } else { // Must be integer or float type
+          return false;
+        }
+        // APSInt Result(APInt(BitWidth, Fn(SourceLHS, SourceRHS, SourceLen)),
+        //               ResultSigned);
+        // return Success(Result, E);
       };
 
   auto HandleMaskBinOp =
@@ -14720,114 +14753,35 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(Result, E);
   }
   case X86::BI__builtin_ia32_ptestz128:
-  case X86::BI__builtin_ia32_ptestz256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getInt();
-        if (!((A & B) == 0)) {
-          return false;
-        }
-      }
-      return true;
-    });
-  }
-  case X86::BI__builtin_ia32_ptestc128:
-  case X86::BI__builtin_ia32_ptestc256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getInt();
-        if (!((~A & B) == 0)) {
-          return false;
-        }
-      }
-      return true;
-    });
-  }
-  case X86::BI__builtin_ia32_ptestnzc128:
-  case X86::BI__builtin_ia32_ptestnzc256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      int Flag1 = false, Flag2 = false;
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getInt();
-        if ((A & B) != 0) {
-          Flag1 = true;
-        }
-        if ((~A & B) != 0) {
-          Flag2 = true;
-        }
-      }
-      return Flag1 && Flag2;
-    });
-  }
+  case X86::BI__builtin_ia32_ptestz256: 
   case X86::BI__builtin_ia32_vtestzps:
   case X86::BI__builtin_ia32_vtestzps256:
   case X86::BI__builtin_ia32_vtestzpd:
   case X86::BI__builtin_ia32_vtestzpd256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const unsigned SignBit = A.getBitWidth() - 1;
-        const bool ASigned = A[SignBit];
-        const bool BSigned = B[SignBit];
-        if (!((ASigned && BSigned) == 0)) {
-          return false;
-        }
-      }
-      return true;
+    return EvalTestOp([](const APInt &A, const APInt &B) {
+      return (A & B) == 0;
     });
   }
+  case X86::BI__builtin_ia32_ptestc128:
+  case X86::BI__builtin_ia32_ptestc256: 
   case X86::BI__builtin_ia32_vtestcps:
   case X86::BI__builtin_ia32_vtestcps256:
   case X86::BI__builtin_ia32_vtestcpd:
   case X86::BI__builtin_ia32_vtestcpd256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const unsigned SignBit = A.getBitWidth() - 1;
-        const bool ASigned = A[SignBit];
-        const bool BSigned = B[SignBit];
-        if (!((!ASigned && BSigned) == 0)) {
-          return false;
-        }
-      }
-      return true;
+    return EvalTestOp([](const APInt &A, const APInt &B) {
+      return (~A & B) == 0;
     });
   }
+  case X86::BI__builtin_ia32_ptestnzc128:
+  case X86::BI__builtin_ia32_ptestnzc256:
   case X86::BI__builtin_ia32_vtestnzcps:
   case X86::BI__builtin_ia32_vtestnzcps256:
   case X86::BI__builtin_ia32_vtestnzcpd:
   case X86::BI__builtin_ia32_vtestnzcpd256: {
-    return EvalTestOp([](const APValue &SourceLHS, const APValue &SourceRHS,
-                         const unsigned SourceLen) {
-      bool Flag1 = false;
-      bool Flag2 = false;
-      for (unsigned I = 0; I < SourceLen; ++I) {
-        const APInt &A = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const APInt &B = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt();
-        const unsigned SignBit = A.getBitWidth() - 1;
-        const bool ASigned = A[SignBit];
-        const bool BSigned = B[SignBit];
-        if ((ASigned && BSigned) != 0) {
-          Flag1 = true;
-        }
-        if ((!ASigned && BSigned) != 0) {
-          Flag2 = true;
-        }
-      }
-      return Flag1 && Flag2;
-    });
+       return EvalTestOp([](const APInt &A, const APInt &B) {
+    return ((A & B) != 0) && ((~A & B) != 0);
+       });
   }
-
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
   case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index b7007245d7e9f..fabbebc69c876 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2064,7 +2064,9 @@ int test_mm256_testnzc_pd(__m256d A, __m256d B) {
   // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
   return _mm256_testnzc_pd(A, B);
 }
-TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0); TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);  
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0);
+TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0);
 
 int test_mm_testnzc_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_testnzc_ps
@@ -2090,6 +2092,7 @@ int test_mm256_testnzc_si256(__m256i A, __m256i B) {
   return _mm256_testnzc_si256(A, B);
 }
 TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1);
+TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){1,0,0,0}, (__m256i)(__v4di){3,0,0,0}) == 1);
 TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0);
 TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0);
 



More information about the cfe-commits mailing list