[clang] [X86][Clang] Add AVX512 Integer Comparison Intrinsics for constexpr Evaluation (PR #164026)

via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 18 13:23:16 PDT 2025


https://github.com/sskzakaria updated https://github.com/llvm/llvm-project/pull/164026

>From dbcb924813e9d657e7aba3b7a9cd79b7635f63ab Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 17 Oct 2025 18:27:24 -0400
Subject: [PATCH 1/6] [X86][Clang] VectorExprEvaluator::VisitCallExpr /
 InterpretBuiltin - add AVX512 integer comparison intrinsics to be used in
 constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td     | 36 ++++-----
 clang/lib/AST/ByteCode/InterpBuiltin.cpp     | 84 ++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp               | 81 +++++++++++++++++++
 clang/lib/Headers/avx512vlbwintrin.h         |  8 +-
 clang/test/CodeGen/X86/avx512vlbw-builtins.c | 23 ++++++
 5 files changed, 210 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 62c70fba946be..e409042c5818e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1272,81 +1272,81 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
   def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
   def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
   def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
   def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
   def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ee18be166845..0ba8fc0cbc203 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3101,6 +3101,62 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+  static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
+                                        const CallExpr *Call, unsigned ID,
+                                        bool IsUnsigned) {
+    assert(Call->getNumArgs() == 4);
+
+    APSInt Mask = popToAPSInt(S, Call->getArg(3));
+    APSInt Opcode = popToAPSInt(S, Call->getArg(2));
+    const Pointer &LHS = S.Stk.pop<Pointer>();
+    const Pointer &RHS = S.Stk.pop<Pointer>();
+
+    assert(LHS.getNumElems() == RHS.getNumElems());
+
+    APInt RetMask = APInt::getZero(LHS.getNumElems());
+    unsigned VectorLen = LHS.getNumElems();
+    PrimType ElemT = LHS.getFieldDesc()->getPrimType();
+
+    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+      for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+        APSInt A = LHS.elem<T>(ElemNum).toAPSInt();
+        APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
+        bool result = false;
+        switch (Opcode.getExtValue() & 0x7) {
+          case 0x00: // _MM_CMPINT_EQ
+            result = (A == B);
+            break;
+          case 0x01: // _MM_CMPINT_LT
+            result = IsUnsigned ? A.ult(B) : A.slt(B);
+            break;
+          case 0x02: // _MM_CMPINT_LE
+            result = IsUnsigned ? A.ule(B) : A.sle(B);
+            break;
+          case 0x03: // _MM_CMPINT_FALSE
+            result = false;
+            break;
+          case 0x04: // _MM_CMPINT_NE
+            result = (A != B);
+            break;
+          case 0x05: // _MM_CMPINT_NLT (>=)
+            result = IsUnsigned ? A.uge(B) : A.sge(B);
+            break;
+          case 0x06: // _MM_CMPINT_NLE (>)
+            result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+            break;
+          case 0x07: // _MM_CMPINT_TRUE
+            result = true;
+            break;
+        }
+
+        RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
+      }
+    });
+
+    pushInteger(S, RetMask, Call->getType());
+    return true;
+  }
+
 static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
                                             const CallExpr *Call) {
   assert(Call->getNumArgs() == 1);
@@ -4141,6 +4197,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_vec_set_v4di:
     return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
 
+  case X86::BI__builtin_ia32_cmpb128_mask:
+  case X86::BI__builtin_ia32_cmpw128_mask:
+  case X86::BI__builtin_ia32_cmpd128_mask:
+  case X86::BI__builtin_ia32_cmpq128_mask:
+  case X86::BI__builtin_ia32_cmpb256_mask:
+  case X86::BI__builtin_ia32_cmpw256_mask:
+  case X86::BI__builtin_ia32_cmpd256_mask:
+  case X86::BI__builtin_ia32_cmpq256_mask:
+  case X86::BI__builtin_ia32_cmpb512_mask:
+  case X86::BI__builtin_ia32_cmpw512_mask:
+  case X86::BI__builtin_ia32_cmpd512_mask:
+  case X86::BI__builtin_ia32_cmpq512_mask:
+    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/false);
+
+  case X86::BI__builtin_ia32_ucmpb128_mask:
+  case X86::BI__builtin_ia32_ucmpw128_mask:
+  case X86::BI__builtin_ia32_ucmpd128_mask:
+  case X86::BI__builtin_ia32_ucmpq128_mask:
+  case X86::BI__builtin_ia32_ucmpb256_mask:
+  case X86::BI__builtin_ia32_ucmpw256_mask:
+  case X86::BI__builtin_ia32_ucmpd256_mask:
+  case X86::BI__builtin_ia32_ucmpq256_mask:
+  case X86::BI__builtin_ia32_ucmpb512_mask:
+  case X86::BI__builtin_ia32_ucmpw512_mask:
+  case X86::BI__builtin_ia32_ucmpd512_mask:
+  case X86::BI__builtin_ia32_ucmpq512_mask:
+    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/true);
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 16141b27f4ce8..ef17e16388fd8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15449,6 +15449,87 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
     return Success(Vec.getVectorElt(Idx).getInt(), E);
   }
+
+  case clang::X86::BI__builtin_ia32_cmpb128_mask:
+  case clang::X86::BI__builtin_ia32_cmpw128_mask:
+  case clang::X86::BI__builtin_ia32_cmpd128_mask:
+  case clang::X86::BI__builtin_ia32_cmpq128_mask:
+  case clang::X86::BI__builtin_ia32_cmpb256_mask:
+  case clang::X86::BI__builtin_ia32_cmpw256_mask:
+  case clang::X86::BI__builtin_ia32_cmpd256_mask:
+  case clang::X86::BI__builtin_ia32_cmpq256_mask:
+  case clang::X86::BI__builtin_ia32_cmpb512_mask:
+  case clang::X86::BI__builtin_ia32_cmpw512_mask:
+  case clang::X86::BI__builtin_ia32_cmpd512_mask:
+  case clang::X86::BI__builtin_ia32_cmpq512_mask:
+  case clang::X86::BI__builtin_ia32_ucmpb128_mask:
+  case clang::X86::BI__builtin_ia32_ucmpw128_mask:
+  case clang::X86::BI__builtin_ia32_ucmpd128_mask:
+  case clang::X86::BI__builtin_ia32_ucmpq128_mask:
+  case clang::X86::BI__builtin_ia32_ucmpb256_mask:
+  case clang::X86::BI__builtin_ia32_ucmpw256_mask:
+  case clang::X86::BI__builtin_ia32_ucmpd256_mask:
+  case clang::X86::BI__builtin_ia32_ucmpq256_mask:
+  case clang::X86::BI__builtin_ia32_ucmpb512_mask:
+  case clang::X86::BI__builtin_ia32_ucmpw512_mask:
+  case clang::X86::BI__builtin_ia32_ucmpd512_mask:
+  case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
+    assert(E->getNumArgs() == 4);
+
+    bool IsUnsigned = (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
+                      BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);
+
+    APValue LHS, RHS;
+    APSInt Mask, Opcode;
+    if (!EvaluateVector(E->getArg(0), LHS, Info) ||
+        !EvaluateVector(E->getArg(1), RHS, Info) ||
+        !EvaluateInteger(E->getArg(2), Opcode, Info) ||
+        !EvaluateInteger(E->getArg(3), Mask, Info))
+      return false;
+
+      assert(LHS.getVectorLength() == RHS.getVectorLength());
+
+    APSInt RetMask = APSInt::getUnsigned(0);
+    unsigned VectorLen = LHS.getVectorLength();
+
+    for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+      APSInt A = LHS.getVectorElt(ElemNum).getInt();
+      APSInt B = RHS.getVectorElt(ElemNum).getInt();
+      bool result = false;
+
+      switch (Opcode.getExtValue() & 0x7) {
+        case 0: // _MM_CMPINT_EQ
+          result = (A == B);
+          break;
+        case 1: // _MM_CMPINT_LT
+          result = IsUnsigned ? A.ult(B) : A.slt(B);
+          break;
+        case 2: // _MM_CMPINT_LE
+          result = IsUnsigned ? A.ule(B) : A.sle(B);
+          break;
+        case 3: // _MM_CMPINT_FALSE
+          result = false;
+          break;
+        case 4: // _MM_CMPINT_NE
+          result = (A != B);
+          break;
+        case 5: // _MM_CMPINT_NLT (>=)
+          result = IsUnsigned ? A.uge(B) : A.sge(B);
+          break;
+        case 6: // _MM_CMPINT_NLE (>)
+          result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+          break;
+        case 7: // _MM_CMPINT_TRUE
+          result = true;
+          break;
+      }
+
+      RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
+    }
+
+    RetMask.setIsUnsigned(true);
+    return Success(APValue(RetMask), E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 639fb60f476c6..ff7ee777ea82a 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2385,20 +2385,20 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
              (__mmask32) __U);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_test_epi8_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_test_epi8_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
@@ -2439,7 +2439,7 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
                                         _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_testn_epi8_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index d569283928a0a..1cc4518484c19 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -2890,6 +2890,12 @@ __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_test_epi8_mask(__A, __B); 
 }
 
+TEST_CONSTEXPR(_mm_test_epi8_mask(
+    (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
+
 __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_test_epi8_mask
   // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
@@ -2897,6 +2903,12 @@ __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi8_mask(__U, __A, __B); 
 }
+TEST_CONSTEXPR(_mm_mask_test_epi8_mask(
+    0xFFFF,
+    (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
 
 __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_test_epi8_mask
@@ -2904,6 +2916,11 @@ __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}}
   return _mm256_test_epi8_mask(__A, __B); 
 }
+TEST_CONSTEXPR(_mm256_test_epi8_mask(
+    (__m256i)(__v32qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m256i)(__v32qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask32)0xfffbfffb);
 
 __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_test_epi8_mask
@@ -2950,6 +2967,12 @@ __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_testn_epi8_mask(__A, __B); 
 }
 
+TEST_CONSTEXPR(_mm_testn_epi8_mask(
+    (__m128i)(__v16qi){1, 2, 77, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 16, 16},
+    (__m128i)(__v16qi){2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}
+)
+== (__mmask16)0xe001);
+
 __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_testn_epi8_mask
   // CHECK: and <2 x i64> %{{.*}}, %{{.*}}

>From fec68e4aecd482f216dcf520b94fb60f44075e22 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 17 Oct 2025 21:32:39 -0400
Subject: [PATCH 2/6] build error

---
 clang/include/clang/Basic/BuiltinsX86.td |  54 ++++++++----
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 104 ++++++++++++-----------
 clang/lib/AST/ExprConstant.cpp           |  60 +++++++------
 clang/lib/Headers/avx512vlbwintrin.h     |  12 +--
 4 files changed, 125 insertions(+), 105 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e409042c5818e..a2a1746fd1cae 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1272,81 +1272,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
   def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
   def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
   def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512f",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
   def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
   def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512f",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ba8fc0cbc203..2526c13b529a7 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3101,61 +3101,61 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-  static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
-                                        const CallExpr *Call, unsigned ID,
-                                        bool IsUnsigned) {
-    assert(Call->getNumArgs() == 4);
+static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
+                                     const CallExpr *Call, unsigned ID,
+                                     bool IsUnsigned) {
+  assert(Call->getNumArgs() == 4);
 
-    APSInt Mask = popToAPSInt(S, Call->getArg(3));
-    APSInt Opcode = popToAPSInt(S, Call->getArg(2));
-    const Pointer &LHS = S.Stk.pop<Pointer>();
-    const Pointer &RHS = S.Stk.pop<Pointer>();
-
-    assert(LHS.getNumElems() == RHS.getNumElems());
+  APSInt Mask = popToAPSInt(S, Call->getArg(3));
+  APSInt Opcode = popToAPSInt(S, Call->getArg(2));
+  const Pointer &LHS = S.Stk.pop<Pointer>();
+  const Pointer &RHS = S.Stk.pop<Pointer>();
 
-    APInt RetMask = APInt::getZero(LHS.getNumElems());
-    unsigned VectorLen = LHS.getNumElems();
-    PrimType ElemT = LHS.getFieldDesc()->getPrimType();
+  assert(LHS.getNumElems() == RHS.getNumElems());
 
-    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-      for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
-        APSInt A = LHS.elem<T>(ElemNum).toAPSInt();
-        APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
-        bool result = false;
-        switch (Opcode.getExtValue() & 0x7) {
-          case 0x00: // _MM_CMPINT_EQ
-            result = (A == B);
-            break;
-          case 0x01: // _MM_CMPINT_LT
-            result = IsUnsigned ? A.ult(B) : A.slt(B);
-            break;
-          case 0x02: // _MM_CMPINT_LE
-            result = IsUnsigned ? A.ule(B) : A.sle(B);
-            break;
-          case 0x03: // _MM_CMPINT_FALSE
-            result = false;
-            break;
-          case 0x04: // _MM_CMPINT_NE
-            result = (A != B);
-            break;
-          case 0x05: // _MM_CMPINT_NLT (>=)
-            result = IsUnsigned ? A.uge(B) : A.sge(B);
-            break;
-          case 0x06: // _MM_CMPINT_NLE (>)
-            result = IsUnsigned ? A.ugt(B) : A.sgt(B);
-            break;
-          case 0x07: // _MM_CMPINT_TRUE
-            result = true;
-            break;
-        }
+  APInt RetMask = APInt::getZero(LHS.getNumElems());
+  unsigned VectorLen = LHS.getNumElems();
+  PrimType ElemT = LHS.getFieldDesc()->getPrimType();
 
-        RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
+  INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+    for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+      APSInt A = LHS.elem<T>(ElemNum).toAPSInt();
+      APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
+      bool result = false;
+      switch (Opcode.getExtValue() & 0x7) {
+      case 0x00: // _MM_CMPINT_EQ
+        result = (A == B);
+        break;
+      case 0x01: // _MM_CMPINT_LT
+        result = IsUnsigned ? A.ult(B) : A.slt(B);
+        break;
+      case 0x02: // _MM_CMPINT_LE
+        result = IsUnsigned ? A.ule(B) : A.sle(B);
+        break;
+      case 0x03: // _MM_CMPINT_FALSE
+        result = false;
+        break;
+      case 0x04: // _MM_CMPINT_NE
+        result = (A != B);
+        break;
+      case 0x05: // _MM_CMPINT_NLT (>=)
+        result = IsUnsigned ? A.uge(B) : A.sge(B);
+        break;
+      case 0x06: // _MM_CMPINT_NLE (>)
+        result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+        break;
+      case 0x07: // _MM_CMPINT_TRUE
+        result = true;
+        break;
       }
-    });
 
-    pushInteger(S, RetMask, Call->getType());
-    return true;
-  }
+      RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
+    }
+  });
+
+  pushInteger(S, RetMask, Call->getType());
+  return true;
+}
 
 static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
                                             const CallExpr *Call) {
@@ -4209,7 +4209,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_cmpw512_mask:
   case X86::BI__builtin_ia32_cmpd512_mask:
   case X86::BI__builtin_ia32_cmpq512_mask:
-    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/false);
+    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
+                                    /*IsUnsigned*/ false);
 
   case X86::BI__builtin_ia32_ucmpb128_mask:
   case X86::BI__builtin_ia32_ucmpw128_mask:
@@ -4223,7 +4224,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_ucmpw512_mask:
   case X86::BI__builtin_ia32_ucmpd512_mask:
   case X86::BI__builtin_ia32_ucmpq512_mask:
-    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/true);
+    return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
+                                    /*IsUnsigned*/ true);
 
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ef17e16388fd8..ac3e3c51fb983 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15476,8 +15476,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   case clang::X86::BI__builtin_ia32_ucmpq512_mask: {
     assert(E->getNumArgs() == 4);
 
-    bool IsUnsigned = (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
-                      BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);
+    bool IsUnsigned =
+        (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask &&
+         BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask);
 
     APValue LHS, RHS;
     APSInt Mask, Opcode;
@@ -15487,41 +15488,44 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         !EvaluateInteger(E->getArg(3), Mask, Info))
       return false;
 
-      assert(LHS.getVectorLength() == RHS.getVectorLength());
+    assert(LHS.getVectorLength() == RHS.getVectorLength());
 
-    APSInt RetMask = APSInt::getUnsigned(0);
     unsigned VectorLen = LHS.getVectorLength();
+    unsigned RetWidth = VectorLen ? VectorLen : 1;
+    if (Mask.getBitWidth() > RetWidth)
+      RetWidth = Mask.getBitWidth();
 
+    APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
     for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
       APSInt A = LHS.getVectorElt(ElemNum).getInt();
       APSInt B = RHS.getVectorElt(ElemNum).getInt();
       bool result = false;
 
       switch (Opcode.getExtValue() & 0x7) {
-        case 0: // _MM_CMPINT_EQ
-          result = (A == B);
-          break;
-        case 1: // _MM_CMPINT_LT
-          result = IsUnsigned ? A.ult(B) : A.slt(B);
-          break;
-        case 2: // _MM_CMPINT_LE
-          result = IsUnsigned ? A.ule(B) : A.sle(B);
-          break;
-        case 3: // _MM_CMPINT_FALSE
-          result = false;
-          break;
-        case 4: // _MM_CMPINT_NE
-          result = (A != B);
-          break;
-        case 5: // _MM_CMPINT_NLT (>=)
-          result = IsUnsigned ? A.uge(B) : A.sge(B);
-          break;
-        case 6: // _MM_CMPINT_NLE (>)
-          result = IsUnsigned ? A.ugt(B) : A.sgt(B);
-          break;
-        case 7: // _MM_CMPINT_TRUE
-          result = true;
-          break;
+      case 0: // _MM_CMPINT_EQ
+        result = (A == B);
+        break;
+      case 1: // _MM_CMPINT_LT
+        result = IsUnsigned ? A.ult(B) : A.slt(B);
+        break;
+      case 2: // _MM_CMPINT_LE
+        result = IsUnsigned ? A.ule(B) : A.sle(B);
+        break;
+      case 3: // _MM_CMPINT_FALSE
+        result = false;
+        break;
+      case 4: // _MM_CMPINT_NE
+        result = (A != B);
+        break;
+      case 5: // _MM_CMPINT_NLT (>=)
+        result = IsUnsigned ? A.uge(B) : A.sge(B);
+        break;
+      case 6: // _MM_CMPINT_NLE (>)
+        result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+        break;
+      case 7: // _MM_CMPINT_TRUE
+        result = true;
+        break;
       }
 
       RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index ff7ee777ea82a..97e48357f3ccc 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2386,21 +2386,18 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_test_epi8_mask (__m128i __A, __m128i __B)
-{
+_mm_test_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
+_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_test_epi8_mask (__m256i __A, __m256i __B)
-{
+_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
                                   _mm256_setzero_si256());
 }
@@ -2440,8 +2437,7 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_testn_epi8_mask (__m128i __A, __m128i __B)
-{
+_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 

>From ee703e034a3e044ba85af6ab1bf3b3144422015c Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 17 Oct 2025 23:59:25 -0400
Subject: [PATCH 3/6] fixed argument order

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp     |  2 +-
 clang/test/CodeGen/X86/avx512vlbw-builtins.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2526c13b529a7..1f428360f73aa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3108,8 +3108,8 @@ static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
 
   APSInt Mask = popToAPSInt(S, Call->getArg(3));
   APSInt Opcode = popToAPSInt(S, Call->getArg(2));
-  const Pointer &LHS = S.Stk.pop<Pointer>();
   const Pointer &RHS = S.Stk.pop<Pointer>();
+  const Pointer &LHS = S.Stk.pop<Pointer>();
 
   assert(LHS.getNumElems() == RHS.getNumElems());
 
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 1cc4518484c19..5c6a343559c52 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -645,6 +645,21 @@ __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0);
 }
 
+TEST_CONSTEXPR(_mm_cmpeq_epi8_mask(
+  ((__m128i)(__v16qi){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}),
+  ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3})
+) == (__mmask16)0x0000);
+
+TEST_CONSTEXPR(_mm_cmplt_epi8_mask(
+  ((__m128i)(__v16qi){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}),
+  ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3})
+) == (__mmask16)0x0u);
+
+TEST_CONSTEXPR(_mm_cmple_epi8_mask(
+  ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}),
+  ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3})
+) == (__mmask16)0xffff);
+
 __mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   // CHECK-LABEL: test_mm_mask_cmp_epi8_mask
   // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}

>From 3b6e186448cd3c031ef903ce894431eb6a77a0a2 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Sat, 18 Oct 2025 03:30:31 -0400
Subject: [PATCH 4/6] moved the type switch into the loop body

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 1f428360f73aa..bab6a45b43251 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3117,42 +3117,42 @@ static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
   unsigned VectorLen = LHS.getNumElems();
   PrimType ElemT = LHS.getFieldDesc()->getPrimType();
 
-  INT_TYPE_SWITCH_NO_BOOL(ElemT, {
     for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
-      APSInt A = LHS.elem<T>(ElemNum).toAPSInt();
+      INT_TYPE_SWITCH_NO_BOOL(ElemT, {
       APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
-      bool result = false;
+      bool Result = false;
       switch (Opcode.getExtValue() & 0x7) {
       case 0x00: // _MM_CMPINT_EQ
-        result = (A == B);
+        Result = (LHS.elem<T>(ElemNum).toAPSInt() == RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x01: // _MM_CMPINT_LT
-        result = IsUnsigned ? A.ult(B) : A.slt(B);
+        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ult(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().slt(RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x02: // _MM_CMPINT_LE
-        result = IsUnsigned ? A.ule(B) : A.sle(B);
+        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ule(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sle(RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x03: // _MM_CMPINT_FALSE
-        result = false;
+        Result = false;
         break;
       case 0x04: // _MM_CMPINT_NE
-        result = (A != B);
+        Result = (LHS.elem<T>(ElemNum).toAPSInt() != RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x05: // _MM_CMPINT_NLT (>=)
-        result = IsUnsigned ? A.uge(B) : A.sge(B);
+        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().uge(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sge(RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x06: // _MM_CMPINT_NLE (>)
-        result = IsUnsigned ? A.ugt(B) : A.sgt(B);
+        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ugt(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sgt(RHS.elem<T>(ElemNum).toAPSInt());
         break;
       case 0x07: // _MM_CMPINT_TRUE
-        result = true;
+        Result = true;
         break;
       }
 
-      RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
-    }
-  });
+      RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
+      });
 
+    }
+  
   pushInteger(S, RetMask, Call->getType());
   return true;
 }
@@ -4210,7 +4210,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_cmpd512_mask:
   case X86::BI__builtin_ia32_cmpq512_mask:
     return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
-                                    /*IsUnsigned*/ false);
+                                    /*IsUnsigned=*/ false);
 
   case X86::BI__builtin_ia32_ucmpb128_mask:
   case X86::BI__builtin_ia32_ucmpw128_mask:
@@ -4225,7 +4225,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_ucmpd512_mask:
   case X86::BI__builtin_ia32_ucmpq512_mask:
     return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
-                                    /*IsUnsigned*/ true);
+                                    /*IsUnsigned=*/ true);
 
   default:
     S.FFDiag(S.Current->getLocation(OpPC),

>From c3cdc21d060e2209c7744b86f1b622e009847e0a Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Sat, 18 Oct 2025 03:32:38 -0400
Subject: [PATCH 5/6] format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 79 ++++++++++++++----------
 1 file changed, 46 insertions(+), 33 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index bab6a45b43251..9966163db4113 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3119,40 +3119,53 @@ static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
 
     for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
       INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-      APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
-      bool Result = false;
-      switch (Opcode.getExtValue() & 0x7) {
-      case 0x00: // _MM_CMPINT_EQ
-        Result = (LHS.elem<T>(ElemNum).toAPSInt() == RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x01: // _MM_CMPINT_LT
-        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ult(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().slt(RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x02: // _MM_CMPINT_LE
-        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ule(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sle(RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x03: // _MM_CMPINT_FALSE
-        Result = false;
-        break;
-      case 0x04: // _MM_CMPINT_NE
-        Result = (LHS.elem<T>(ElemNum).toAPSInt() != RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x05: // _MM_CMPINT_NLT (>=)
-        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().uge(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sge(RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x06: // _MM_CMPINT_NLE (>)
-        Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ugt(RHS.elem<T>(ElemNum).toAPSInt()) : LHS.elem<T>(ElemNum).toAPSInt().sgt(RHS.elem<T>(ElemNum).toAPSInt());
-        break;
-      case 0x07: // _MM_CMPINT_TRUE
-        Result = true;
-        break;
-      }
+        APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
+        bool Result = false;
+        switch (Opcode.getExtValue() & 0x7) {
+        case 0x00: // _MM_CMPINT_EQ
+          Result = (LHS.elem<T>(ElemNum).toAPSInt() ==
+                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x01: // _MM_CMPINT_LT
+          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ult(
+                                    RHS.elem<T>(ElemNum).toAPSInt())
+                              : LHS.elem<T>(ElemNum).toAPSInt().slt(
+                                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x02: // _MM_CMPINT_LE
+          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ule(
+                                    RHS.elem<T>(ElemNum).toAPSInt())
+                              : LHS.elem<T>(ElemNum).toAPSInt().sle(
+                                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x03: // _MM_CMPINT_FALSE
+          Result = false;
+          break;
+        case 0x04: // _MM_CMPINT_NE
+          Result = (LHS.elem<T>(ElemNum).toAPSInt() !=
+                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x05: // _MM_CMPINT_NLT (>=)
+          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().uge(
+                                    RHS.elem<T>(ElemNum).toAPSInt())
+                              : LHS.elem<T>(ElemNum).toAPSInt().sge(
+                                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x06: // _MM_CMPINT_NLE (>)
+          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ugt(
+                                    RHS.elem<T>(ElemNum).toAPSInt())
+                              : LHS.elem<T>(ElemNum).toAPSInt().sgt(
+                                    RHS.elem<T>(ElemNum).toAPSInt());
+          break;
+        case 0x07: // _MM_CMPINT_TRUE
+          Result = true;
+          break;
+        }
 
-      RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
+        RetMask.setBitVal(ElemNum, Mask[ElemNum] && Result);
       });
-
     }
-  
+
   pushInteger(S, RetMask, Call->getType());
   return true;
 }
@@ -4210,7 +4223,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_cmpd512_mask:
   case X86::BI__builtin_ia32_cmpq512_mask:
     return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
-                                    /*IsUnsigned=*/ false);
+                                    /*IsUnsigned=*/false);
 
   case X86::BI__builtin_ia32_ucmpb128_mask:
   case X86::BI__builtin_ia32_ucmpw128_mask:
@@ -4225,7 +4238,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_ucmpd512_mask:
   case X86::BI__builtin_ia32_ucmpq512_mask:
     return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID,
-                                    /*IsUnsigned=*/ true);
+                                    /*IsUnsigned=*/true);
 
   default:
     S.FFDiag(S.Current->getLocation(OpPC),

>From 151e23bc11d32dd2fa4759bc4d72a0cf52199a60 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Sat, 18 Oct 2025 16:18:25 -0400
Subject: [PATCH 6/6] removing repeated elem(ElemNum).toAPSInt() calls to top
 of the loop

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 29 +++++++-----------------
 clang/lib/AST/ExprConstant.cpp           |  5 ++--
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9966163db4113..2a996adaedc6f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3119,43 +3119,30 @@ static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC,
 
     for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
       INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-        APSInt B = RHS.elem<T>(ElemNum).toAPSInt();
+        const APSInt &A = LHS.elem<T>(ElemNum).toAPSInt();
+        const APSInt &B = RHS.elem<T>(ElemNum).toAPSInt();
         bool Result = false;
         switch (Opcode.getExtValue() & 0x7) {
         case 0x00: // _MM_CMPINT_EQ
-          Result = (LHS.elem<T>(ElemNum).toAPSInt() ==
-                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = (A == B);
           break;
         case 0x01: // _MM_CMPINT_LT
-          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ult(
-                                    RHS.elem<T>(ElemNum).toAPSInt())
-                              : LHS.elem<T>(ElemNum).toAPSInt().slt(
-                                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = IsUnsigned ? A.ult(B) : A.slt(B);
           break;
         case 0x02: // _MM_CMPINT_LE
-          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ule(
-                                    RHS.elem<T>(ElemNum).toAPSInt())
-                              : LHS.elem<T>(ElemNum).toAPSInt().sle(
-                                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = IsUnsigned ? A.ule(B) : A.sle(B);
           break;
         case 0x03: // _MM_CMPINT_FALSE
           Result = false;
           break;
         case 0x04: // _MM_CMPINT_NE
-          Result = (LHS.elem<T>(ElemNum).toAPSInt() !=
-                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = (A != B);
           break;
         case 0x05: // _MM_CMPINT_NLT (>=)
-          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().uge(
-                                    RHS.elem<T>(ElemNum).toAPSInt())
-                              : LHS.elem<T>(ElemNum).toAPSInt().sge(
-                                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = IsUnsigned ? A.uge(B) : A.sge(B);
           break;
         case 0x06: // _MM_CMPINT_NLE (>)
-          Result = IsUnsigned ? LHS.elem<T>(ElemNum).toAPSInt().ugt(
-                                    RHS.elem<T>(ElemNum).toAPSInt())
-                              : LHS.elem<T>(ElemNum).toAPSInt().sgt(
-                                    RHS.elem<T>(ElemNum).toAPSInt());
+          Result = IsUnsigned ? A.ugt(B) : A.sgt(B);
           break;
         case 0x07: // _MM_CMPINT_TRUE
           Result = true;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ac3e3c51fb983..4508029ed2494 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15497,8 +15497,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
 
     APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
     for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
-      APSInt A = LHS.getVectorElt(ElemNum).getInt();
-      APSInt B = RHS.getVectorElt(ElemNum).getInt();
+      const APSInt &A = LHS.getVectorElt(ElemNum).getInt();
+      const APSInt &B = RHS.getVectorElt(ElemNum).getInt();
       bool result = false;
 
       switch (Opcode.getExtValue() & 0x7) {
@@ -15531,7 +15531,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
       RetMask.setBitVal(ElemNum, Mask[ElemNum] && result);
     }
 
-    RetMask.setIsUnsigned(true);
     return Success(APValue(RetMask), E);
   }
   }



More information about the cfe-commits mailing list