[clang] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX512 mask predicate intrinsics to be used in constexpr (PR #165054)

via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 29 10:45:09 PDT 2025


https://github.com/sskzakaria updated https://github.com/llvm/llvm-project/pull/165054

>From 00115fd82621a909b6ee8bf049159fd09da3cba1 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 24 Oct 2025 18:01:57 -0400
Subject: [PATCH 1/3] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin -
 allow AVX512 mask predicate intrinsics

---
 clang/include/clang/Basic/BuiltinsX86.td     | 33 +++++++++++++-------
 clang/lib/AST/ByteCode/InterpBuiltin.cpp     | 33 ++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp               | 27 ++++++++++++++++
 clang/lib/Headers/avx512vlbwintrin.h         | 10 +++---
 clang/test/CodeGen/X86/avx512vlbw-builtins.c |  5 +++
 5 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 62c70fba946be..c962b28668562 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2502,24 +2502,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
   def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
   def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
   def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
 }
 
-let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512dq",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
   def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
   def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
   def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
 }
 
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
 }
 
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
 }
 
@@ -2539,11 +2543,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
   def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
 }
 
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
 }
 
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
 }
 
@@ -2563,11 +2569,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector
   def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
 }
 
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512dq,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
 }
 
-let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512dq,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
 }
 
@@ -3361,15 +3369,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
   def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
 }
 
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
 }
 
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0ee18be166845..f4c61b0ae8d06 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3128,6 +3128,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
+                                          const CallExpr *Call, unsigned ID) {
+  assert(Call->getNumArgs() == 1);
+
+  const Pointer &Vec = S.Stk.pop<Pointer>();
+  APInt RetMask(Vec.getNumElems(), 0);
+  unsigned VectorLen = Vec.getNumElems();
+  PrimType ElemT = Vec.getFieldDesc()->getPrimType();
+
+  for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+    APSInt A;
+    INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
+    unsigned MSB = A[A.getBitWidth() - 1];
+    RetMask.setBitVal(ElemNum, MSB);
+  }
+  pushInteger(S, RetMask, Call->getType());
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4141,6 +4160,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_vec_set_v4di:
     return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
 
+  case X86::BI__builtin_ia32_cvtb2mask128:
+  case X86::BI__builtin_ia32_cvtb2mask256:
+  case X86::BI__builtin_ia32_cvtb2mask512:
+  case X86::BI__builtin_ia32_cvtw2mask128:
+  case X86::BI__builtin_ia32_cvtw2mask256:
+  case X86::BI__builtin_ia32_cvtw2mask512:
+  case X86::BI__builtin_ia32_cvtd2mask128:
+  case X86::BI__builtin_ia32_cvtd2mask256:
+  case X86::BI__builtin_ia32_cvtd2mask512:
+  case X86::BI__builtin_ia32_cvtq2mask128:
+  case X86::BI__builtin_ia32_cvtq2mask256:
+  case X86::BI__builtin_ia32_cvtq2mask512:
+    return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 16141b27f4ce8..75a633a5c6232 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -15449,6 +15449,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
     return Success(Vec.getVectorElt(Idx).getInt(), E);
   }
+
+  case clang::X86::BI__builtin_ia32_cvtb2mask128:
+  case clang::X86::BI__builtin_ia32_cvtb2mask256:
+  case clang::X86::BI__builtin_ia32_cvtb2mask512:
+  case clang::X86::BI__builtin_ia32_cvtw2mask128:
+  case clang::X86::BI__builtin_ia32_cvtw2mask256:
+  case clang::X86::BI__builtin_ia32_cvtw2mask512:
+  case clang::X86::BI__builtin_ia32_cvtd2mask128:
+  case clang::X86::BI__builtin_ia32_cvtd2mask256:
+  case clang::X86::BI__builtin_ia32_cvtd2mask512:
+  case clang::X86::BI__builtin_ia32_cvtq2mask128:
+  case clang::X86::BI__builtin_ia32_cvtq2mask256:
+  case clang::X86::BI__builtin_ia32_cvtq2mask512: {
+    assert(E->getNumArgs() == 1);
+    APValue Vec;
+    if (!EvaluateVector(E->getArg(0), Vec, Info))
+      return false;
+
+    unsigned VectorLen = Vec.getVectorLength();
+    APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true);
+    for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
+      const APSInt &A = Vec.getVectorElt(ElemNum).getInt();
+      unsigned MSB = A[A.getBitWidth() - 1];
+      RetMask.setBitVal(ElemNum, MSB);
+    }
+    return Success(APValue(RetMask), E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 639fb60f476c6..d15e4fdc2ffc1 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2492,15 +2492,13 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_movepi8_mask (__m128i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi8_mask(__m128i __A) {
   return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_movepi8_mask (__m256i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi8_mask(__m256i __A) {
   return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
 }
 
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index d569283928a0a..c38e9a26c3609 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3009,6 +3009,11 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
   return _mm_movepi8_mask(__A); 
 }
 
+TEST_CONSTEXPR(_mm_movepi8_mask(
+    ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7,
+                        8, 9, 10, 11, 12, 13, 14, 15})
+) == (__mmask16)0x0000);
+
 __mmask32 test_mm256_movepi8_mask(__m256i __A) {
   // CHECK-LABEL: test_mm256_movepi8_mask
   // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer

>From 6df958d73c16e75129b2137dd1f5878950a6ce24 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 24 Oct 2025 18:57:35 -0400
Subject: [PATCH 2/3] _mm256_movepi8_mask

---
 clang/test/CodeGen/X86/avx512vlbw-builtins.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index c38e9a26c3609..45ebc1d819ce7 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3010,9 +3010,9 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
 }
 
 TEST_CONSTEXPR(_mm_movepi8_mask(
-    ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7,
+    ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7,
                         8, 9, 10, 11, 12, 13, 14, 15})
-) == (__mmask16)0x0000);
+) == (__mmask16)0x0004); 
 
 __mmask32 test_mm256_movepi8_mask(__m256i __A) {
   // CHECK-LABEL: test_mm256_movepi8_mask
@@ -3020,6 +3020,13 @@ __mmask32 test_mm256_movepi8_mask(__m256i __A) {
   return _mm256_movepi8_mask(__A); 
 }
 
+TEST_CONSTEXPR(_mm256_movepi8_mask(
+    ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7,
+                        8, 9, 10, 11, 12, 13, 14, 15,
+                        16, 17, 18, 19, 20, 21, 22, 23,
+                        24, 25, 26, 27, 28, 29, 30, char(128)})
+) == (__mmask32)0x80000004);
+
 __m128i test_mm_movm_epi8(__mmask16 __A) {
   // CHECK-LABEL: test_mm_movm_epi8
   // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>

>From 01d277598e6a660bd3c4c63d7761233c19d6d277 Mon Sep 17 00:00:00 2001
From: sskzakaria <ssskzakaria at proton.me>
Date: Fri, 24 Oct 2025 19:14:54 -0400
Subject: [PATCH 3/3] fixing merge conflict

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 7c3c2fdd7c591..3837764fab96b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3336,6 +3336,9 @@ static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
     RetMask.setBitVal(ElemNum, MSB);
   }
   pushInteger(S, RetMask, Call->getType());
+  return true;
+}
+
 static bool interp__builtin_x86_byteshift(
     InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
     llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
@@ -4514,6 +4517,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_cvtq2mask256:
   case X86::BI__builtin_ia32_cvtq2mask512:
     return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);
+
   case X86::BI__builtin_ia32_pslldqi128_byteshift:
   case X86::BI__builtin_ia32_pslldqi256_byteshift:
   case X86::BI__builtin_ia32_pslldqi512_byteshift:



More information about the cfe-commits mailing list