[clang] [Headers][X86] Allow basic AVX512 predicate ops to be used in constexpr (PR #159998)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Sep 21 12:14:13 PDT 2025
https://github.com/fennecJ updated https://github.com/llvm/llvm-project/pull/159998
>From 7b91ea75b304a0b0fb3f14945b5c2f21f035bffe Mon Sep 17 00:00:00 2001
From: fennecJ <hwahwa649 at gmail.com>
Date: Mon, 22 Sep 2025 02:40:44 +0800
Subject: [PATCH 1/2] Allow kand* to be used in constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 6 +++---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++
clang/lib/AST/ExprConstant.cpp | 22 ++++++++++++++++++++++
clang/lib/Headers/avx512bwintrin.h | 9 ++++-----
clang/lib/Headers/avx512dqintrin.h | 5 ++---
clang/lib/Headers/avx512fintrin.h | 5 ++---
clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++
clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++
9 files changed, 65 insertions(+), 14 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index aac502091b57e..e5555236c7666 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -3083,15 +3083,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in {
def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}
-let Features = "avx512dq", Attributes = [NoThrow, Const] in {
+let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in {
def kandqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
def kandhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kandsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 40b9e04aa335c..41d5d76c8c659 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3588,6 +3588,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_selectpd_512:
return interp__builtin_select(S, OpPC, Call);
+ case X86::BI__builtin_ia32_kandqi:
+ case X86::BI__builtin_ia32_kandhi:
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
+
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 12e4e25bd29c4..cd41c54087d08 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13542,6 +13542,20 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
+
+ auto HandleMaskBinOp =
+ [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
+ -> bool {
+ APValue LHS, RHS;
+ if (!Evaluate(LHS, Info, E->getArg(0)) ||
+ !Evaluate(RHS, Info, E->getArg(1)))
+ return false;
+
+ APSInt ResultInt = Fn(LHS.getInt(), RHS.getInt());
+
+ return Success(APValue(ResultInt), E);
+ };
+
switch (BuiltinOp) {
default:
return false;
@@ -14640,6 +14654,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
+
+ case X86::BI__builtin_ia32_kandqi:
+ case X86::BI__builtin_ia32_kandhi:
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi: {
+ return HandleMaskBinOp(
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; });
+ }
}
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 599cfbe479676..cfe16062d8f65 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -42,14 +42,13 @@ static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
return __builtin_ia32_knotdi(__M);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kand_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask32(__mmask32 __A, __mmask32 __B) {
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
- __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask64(__mmask64 __A, __mmask64 __B) {
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 95fdc2851cb9b..52b3e9d148d6e 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -35,9 +35,8 @@ _knot_mask8(__mmask8 __M)
return __builtin_ia32_knotqi(__M);
}
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kand_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask8(__mmask8 __A, __mmask8 __B) {
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 8ebfb75170e17..9ffde30cba915 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8116,9 +8116,8 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kand (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16
+ __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
}
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 3be708aea8a4d..28e9ff611472a 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -36,6 +36,12 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
__E, __F);
}
+TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness
+TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated
+TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero
+TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one
+TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint
+
__mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kand_mask64
// CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
@@ -46,6 +52,12 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
__E, __F);
}
+TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness
+TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value
+TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero
+TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one
+TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint
+
__mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kandn_mask32
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index df096e3607f30..cc15517fe5bb5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -29,6 +29,12 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
__E, __F);
}
+TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08); // data correctness
+TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23); // should be truncated
+TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00); // all-zero
+TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56); // all-one
+TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00); // disjoint
+
__mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_kandn_mask8
// CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index f93216e546a63..70a7ff80f9026 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -8502,6 +8502,12 @@ __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
__E, __F);
}
+TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88); // data correctness
+TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); // should be truncated
+TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000); // all-zero
+TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678); // all-one
+TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000); // disjoint
+
__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kandn
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
>From a7f6dad463a3b1386412736a31f6632f2b92876a Mon Sep 17 00:00:00 2001
From: fennecJ <hwahwa649 at gmail.com>
Date: Mon, 22 Sep 2025 03:13:30 +0800
Subject: [PATCH 2/2] Allow int2mask, mask2int to be used in constexpr
---
clang/lib/Headers/avx512fintrin.h | 10 ++++------
clang/test/CodeGen/X86/avx512f-builtins.c | 4 ++++
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9ffde30cba915..5a45082f76e81 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -520,15 +520,13 @@ _mm512_castsi512_si256(__m512i __A) {
return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_int2mask(int __a)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_int2mask(int __a) {
return (__mmask16)__a;
}
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_mask2int(__mmask16 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask2int(__mmask16 __a) {
return (int)__a;
}
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 70a7ff80f9026..e7f6b5aac8279 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11037,6 +11037,8 @@ __mmask16 test_mm512_int2mask(int __a)
return _mm512_int2mask(__a);
}
+TEST_CONSTEXPR(_mm512_int2mask((int)0xDEADBEEF) == 0xBEEF);
+
int test_mm512_mask2int(__mmask16 __a)
{
// CHECK-LABEL: test_mm512_mask2int
@@ -11044,6 +11046,8 @@ int test_mm512_mask2int(__mmask16 __a)
return _mm512_mask2int(__a);
}
+TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); // Zero-extended
+
__m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
// CHECK-LABEL: test_mm_mask_move_ss
More information about the cfe-commits
mailing list