[clang] [Headers][X86] Allow basic AVX512 predicate ops to be used in constexpr (PR #159998)

via cfe-commits cfe-commits at lists.llvm.org
Sun Sep 21 12:14:13 PDT 2025


https://github.com/fennecJ updated https://github.com/llvm/llvm-project/pull/159998

>From 7b91ea75b304a0b0fb3f14945b5c2f21f035bffe Mon Sep 17 00:00:00 2001
From: fennecJ <hwahwa649 at gmail.com>
Date: Mon, 22 Sep 2025 02:40:44 +0800
Subject: [PATCH 1/2] Allow kand* to be used in constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td   |  6 +++---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp   |  8 ++++++++
 clang/lib/AST/ExprConstant.cpp             | 22 ++++++++++++++++++++++
 clang/lib/Headers/avx512bwintrin.h         |  9 ++++-----
 clang/lib/Headers/avx512dqintrin.h         |  5 ++---
 clang/lib/Headers/avx512fintrin.h          |  5 ++---
 clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++
 clang/test/CodeGen/X86/avx512dq-builtins.c |  6 ++++++
 clang/test/CodeGen/X86/avx512f-builtins.c  |  6 ++++++
 9 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index aac502091b57e..e5555236c7666 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -3083,15 +3083,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in {
   def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
 }
 
-let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in {
   def kandqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
   def kandhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
   def kandsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
   def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 40b9e04aa335c..41d5d76c8c659 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3588,6 +3588,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_selectpd_512:
     return interp__builtin_select(S, OpPC, Call);
 
+  case X86::BI__builtin_ia32_kandqi:
+  case X86::BI__builtin_ia32_kandhi:
+  case X86::BI__builtin_ia32_kandsi:
+  case X86::BI__builtin_ia32_kanddi:
+    return interp__builtin_elementwise_int_binop(
+        S, OpPC, Call,
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
+
   case Builtin::BI__builtin_elementwise_fshl:
     return interp__builtin_elementwise_triop(S, OpPC, Call,
                                              llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 12e4e25bd29c4..cd41c54087d08 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13542,6 +13542,20 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
 
 bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
                                             unsigned BuiltinOp) {
+
+  auto HandleMaskBinOp =
+      [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
+      -> bool {
+    APValue LHS, RHS;
+    if (!Evaluate(LHS, Info, E->getArg(0)) ||
+        !Evaluate(RHS, Info, E->getArg(1)))
+      return false;
+
+    APSInt ResultInt = Fn(LHS.getInt(), RHS.getInt());
+
+    return Success(APValue(ResultInt), E);
+  };
+
   switch (BuiltinOp) {
   default:
     return false;
@@ -14640,6 +14654,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         Result.setBitVal(P++, Val[I]);
     return Success(Result, E);
   }
+
+  case X86::BI__builtin_ia32_kandqi:
+  case X86::BI__builtin_ia32_kandhi:
+  case X86::BI__builtin_ia32_kandsi:
+  case X86::BI__builtin_ia32_kanddi: {
+    return HandleMaskBinOp(
+        [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
+  }
   }
 }
 
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 599cfbe479676..cfe16062d8f65 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -42,14 +42,13 @@ static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
   return __builtin_ia32_knotdi(__M);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kand_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
-                                                            __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
 }
 
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 95fdc2851cb9b..52b3e9d148d6e 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -35,9 +35,8 @@ _knot_mask8(__mmask8 __M)
   return __builtin_ia32_knotqi(__M);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kand_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
 }
 
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 8ebfb75170e17..9ffde30cba915 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8116,9 +8116,8 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
 
 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kand (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16
+    __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
 }
 
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 3be708aea8a4d..28e9ff611472a 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -36,6 +36,12 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
                                                     __E, __F);
 }
 
+TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness
+TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated
+TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero
+TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one
+TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint
+
 __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_kand_mask64
   // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -46,6 +52,12 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
                                                    __E, __F);
 }
 
+TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness
+TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value
+TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero
+TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one
+TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint
+
 __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_kandn_mask32
   // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index df096e3607f30..cc15517fe5bb5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -29,6 +29,12 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
                                                    __E, __F);
 }
 
+TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08);        // data correctness
+TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23);       // should be truncated
+TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00);        // all-zero
+TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56);        // all-one
+TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00);        // disjoint
+
 __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_kandn_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index f93216e546a63..70a7ff80f9026 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -8502,6 +8502,12 @@ __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
                                                    __E, __F);
 }
 
+TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88);        // data correctness
+TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); // should be truncated
+TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000);  // all-zero
+TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678);  // all-one
+TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000);  // disjoint
+
 __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_mm512_kandn
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>

>From a7f6dad463a3b1386412736a31f6632f2b92876a Mon Sep 17 00:00:00 2001
From: fennecJ <hwahwa649 at gmail.com>
Date: Mon, 22 Sep 2025 03:13:30 +0800
Subject: [PATCH 2/2] Allow int2mask, mask2int to be used in constexpr

---
 clang/lib/Headers/avx512fintrin.h         | 10 ++++------
 clang/test/CodeGen/X86/avx512f-builtins.c |  4 ++++
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9ffde30cba915..5a45082f76e81 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -520,15 +520,13 @@ _mm512_castsi512_si256(__m512i __A) {
   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_int2mask(int __a)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_int2mask(int __a) {
   return (__mmask16)__a;
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_mask2int(__mmask16 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask2int(__mmask16 __a) {
   return (int)__a;
 }
 
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 70a7ff80f9026..e7f6b5aac8279 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11037,6 +11037,8 @@ __mmask16 test_mm512_int2mask(int __a)
   return _mm512_int2mask(__a);
 }
 
+TEST_CONSTEXPR(_mm512_int2mask((int)0xDEADBEEF) == 0xBEEF);
+
 int test_mm512_mask2int(__mmask16 __a)
 {
   // CHECK-LABEL: test_mm512_mask2int
@@ -11044,6 +11046,8 @@ int test_mm512_mask2int(__mmask16 __a)
   return _mm512_mask2int(__a);
 }
 
+TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); // Zero-extended
+
 __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   // CHECK-LABEL: test_mm_mask_move_ss



More information about the cfe-commits mailing list