[clang] [X86][Clang] Add AVX512 kunpck intrinsics to be used in constexp (PR #167683)
Ahmed Nour via cfe-commits
cfe-commits at lists.llvm.org
Sun Nov 16 06:58:29 PST 2025
https://github.com/ahmednoursphinx updated https://github.com/llvm/llvm-project/pull/167683
>From e91be48deb3f895f88e9ab6a34ffd730e3fce47f Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Wed, 12 Nov 2025 13:01:46 +0200
Subject: [PATCH 1/5] [clang] Add _mm512_kunpackd and _mm512_kunpackw to
avx512fintrin.h
Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw
to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic.
These intrinsics extract and concatenate the lower halves of mask
registers, using the existing backend support for __builtin_ia32_kunpckdi
and __builtin_ia32_kunpcksi builtins.
Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h
for completeness.
Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c.
---
clang/lib/Headers/avx512fintrin.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..e735c8a35f1ce 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -40,6 +40,8 @@ typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)))
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
+typedef unsigned int __mmask32;
+typedef unsigned long long __mmask64;
/* Rounding mode macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@@ -8100,6 +8102,18 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B);
+}
+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
>From c66f105a35d071001c1f126178565dfb75bd540c Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Wed, 12 Nov 2025 13:05:30 +0200
Subject: [PATCH 2/5] [clang] Add _mm512_kunpackd and _mm512_kunpackw
intrinsics with constexpr support
Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw
to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic.
These intrinsics extract and concatenate the lower halves of mask
registers, using the existing backend support for __builtin_ia32_kunpckdi
and __builtin_ia32_kunpcksi builtins.
Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h
for completeness.
This patch adds constexpr support for all three kunpack intrinsics by:
1. Using __DEFAULT_FN_ATTRS_CONSTEXPR attribute
2. Adding builtin interpretation in ExprConstant.cpp for compile-time
evaluation in constexpr contexts
3. Adding constexpr tests to verify correct behavior
Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c for
runtime code generation validation.
---
clang/lib/AST/ExprConstant.cpp | 51 +++++++++++++++++++++++
clang/lib/Headers/avx512bwintrin.h | 6 +--
clang/lib/Headers/avx512fintrin.h | 16 ++++---
clang/test/CodeGen/X86/avx512f-builtins.c | 18 ++++++++
4 files changed, 79 insertions(+), 12 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..62a0a26ff1087 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16287,6 +16287,42 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success((A | B) == 0, E);
}
+ case clang::X86::BI__builtin_ia32_kunpckhi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ // Extract lower 8 bits of each operand and concatenate
+ // Result = (A[7:0] << 8) | B[7:0]
+ APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
+ return Success(Result, E);
+ }
+
+ case clang::X86::BI__builtin_ia32_kunpckdi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ // Extract lower 32 bits of each operand and concatenate
+ // Result = (A[31:0] << 32) | B[31:0]
+ APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
+ return Success(Result, E);
+ }
+
+ case clang::X86::BI__builtin_ia32_kunpcksi: {
+ APSInt A, B;
+ if (!EvaluateInteger(E->getArg(0), A, Info) ||
+ !EvaluateInteger(E->getArg(1), B, Info))
+ return false;
+
+ // Extract lower 16 bits of each operand and concatenate
+ // Result = (A[15:0] << 16) | B[15:0]
+ APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
+ return Success(Result, E);
+ }
+
case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
@@ -16413,6 +16449,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(APValue(Result), E);
}
+ case X86::BI__builtin_ia32_kunpckhi:
+ case X86::BI__builtin_ia32_kunpcksi:
+ case X86::BI__builtin_ia32_kunpckdi: {
+ return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
+ // Unpack: concatenate lower half of RHS with lower half of LHS
+ unsigned HalfBits = LHS.getBitWidth() / 2;
+ APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
+ Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
+
+ APSInt LowerLHS = LHS & Mask;
+ APSInt LowerRHS = RHS & Mask;
+ return LowerRHS | (LowerLHS << HalfBits);
+ });
+ }
+
case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..ff850973a1833 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1606,13 +1606,13 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
(__v64qi) _mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
- __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e735c8a35f1ce..bd8a15b5224db 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8096,22 +8096,20 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
-_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
-{
- return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B);
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
+ return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
}
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
- return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B);
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
+ return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 17778b52d3671..9dcc749910175 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9126,6 +9126,24 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
__E, __F);
}
+TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
+TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
+TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
+TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
+TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
+
+TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
+TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
+TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
+
+TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
+
__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kxnor
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
>From e56047a338d8e4b589adabb4a51ea0664add022a Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Wed, 12 Nov 2025 14:18:10 +0200
Subject: [PATCH 3/5] chore: update formatting
---
clang/lib/AST/ExprConstant.cpp | 2 +-
clang/lib/Headers/avx512bwintrin.h | 3 +--
clang/lib/Headers/avx512fintrin.h | 3 +--
3 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 62a0a26ff1087..a0a1d0ce2a94b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16457,7 +16457,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned HalfBits = LHS.getBitWidth() / 2;
APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
-
+
APSInt LowerLHS = LHS & Mask;
APSInt LowerRHS = RHS & Mask;
return LowerRHS | (LowerLHS << HalfBits);
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index ff850973a1833..d247f648b9eb5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1613,8 +1613,7 @@ _mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
(__mmask32) __B);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index bd8a15b5224db..badc30a7eb26c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8097,8 +8097,7 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
-{
+_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
>From e46b536cc1fe5df978f3b536f87706cce578a5af Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 16 Nov 2025 16:54:54 +0200
Subject: [PATCH 4/5] refactor: PR Feedback
---
clang/include/clang/Basic/BuiltinsX86.td | 4 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++
clang/lib/AST/ExprConstant.cpp | 49 ++++-------------------
clang/lib/Headers/avx512fintrin.h | 10 -----
clang/test/CodeGen/X86/avx512f-builtins.c | 10 ++---
5 files changed, 41 insertions(+), 58 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cb08e2107f072..4872f350bb4ff 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2151,7 +2151,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
}
@@ -3194,7 +3194,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6c7b2f502cc51..c1389424154be 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4579,6 +4579,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case X86::BI__builtin_ia32_kunpckhi:
+ case X86::BI__builtin_ia32_kunpckdi:
+ case X86::BI__builtin_ia32_kunpcksi:
+ return interp__builtin_elementwise_int_binop(S, OpPC, Call,
+ [](const APSInt &A,
+ const APSInt &B) {
+ // Generic kunpack: extract
+ // lower half of each operand
+ // and concatenate Result =
+ // (A[HalfWidth-1:0] <<
+ // HalfWidth) |
+ // B[HalfWidth-1:0]
+ unsigned HalfWidth =
+ A.getBitWidth() / 2;
+ APSInt Result(
+ A.getLoBits(HalfWidth)
+ .zext(A.getBitWidth()),
+ A.isUnsigned());
+ Result <<= HalfWidth;
+ Result |= APSInt(
+ B.getLoBits(HalfWidth)
+ .zext(B.getBitWidth()),
+ B.isUnsigned());
+ return Result;
+ });
+
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a0a1d0ce2a94b..854d2969b6784 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16287,39 +16287,20 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success((A | B) == 0, E);
}
- case clang::X86::BI__builtin_ia32_kunpckhi: {
- APSInt A, B;
- if (!EvaluateInteger(E->getArg(0), A, Info) ||
- !EvaluateInteger(E->getArg(1), B, Info))
- return false;
-
- // Extract lower 8 bits of each operand and concatenate
- // Result = (A[7:0] << 8) | B[7:0]
- APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
- return Success(Result, E);
- }
-
- case clang::X86::BI__builtin_ia32_kunpckdi: {
- APSInt A, B;
- if (!EvaluateInteger(E->getArg(0), A, Info) ||
- !EvaluateInteger(E->getArg(1), B, Info))
- return false;
-
- // Extract lower 32 bits of each operand and concatenate
- // Result = (A[31:0] << 32) | B[31:0]
- APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
- return Success(Result, E);
- }
-
+ case clang::X86::BI__builtin_ia32_kunpckhi:
+ case clang::X86::BI__builtin_ia32_kunpckdi:
case clang::X86::BI__builtin_ia32_kunpcksi: {
APSInt A, B;
if (!EvaluateInteger(E->getArg(0), A, Info) ||
!EvaluateInteger(E->getArg(1), B, Info))
return false;
- // Extract lower 16 bits of each operand and concatenate
- // Result = (A[15:0] << 16) | B[15:0]
- APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
+ // Generic kunpack: extract lower half of each operand and concatenate
+ // Result = (A[HalfWidth-1:0] << HalfWidth) | B[HalfWidth-1:0]
+ unsigned HalfWidth = A.getBitWidth() / 2;
+ APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned());
+ Result <<= HalfWidth;
+ Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned());
return Success(Result, E);
}
@@ -16449,20 +16430,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(APValue(Result), E);
}
- case X86::BI__builtin_ia32_kunpckhi:
- case X86::BI__builtin_ia32_kunpcksi:
- case X86::BI__builtin_ia32_kunpckdi: {
- return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
- // Unpack: concatenate lower half of RHS with lower half of LHS
- unsigned HalfBits = LHS.getBitWidth() / 2;
- APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
- Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
-
- APSInt LowerLHS = LHS & Mask;
- APSInt LowerRHS = RHS & Mask;
- return LowerRHS | (LowerLHS << HalfBits);
- });
- }
case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index badc30a7eb26c..a927de739b644 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8101,16 +8101,6 @@ _mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
- return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
-}
-
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
- return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
-}
-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 9dcc749910175..13c7eec76233f 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9126,21 +9126,21 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
__E, __F);
}
-TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
+TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
-TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
+TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
-TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
+TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
-TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
-TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull);
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
>From 31d0b0975bc2824ab2fd20789ba8f6ea1eb1f989 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Sun, 16 Nov 2025 16:58:17 +0200
Subject: [PATCH 5/5] Format files
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 38 ++++++++++--------------
clang/lib/AST/ExprConstant.cpp | 4 +--
2 files changed, 18 insertions(+), 24 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c1389424154be..ce213e52df3a0 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4582,28 +4582,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_kunpckhi:
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
- return interp__builtin_elementwise_int_binop(S, OpPC, Call,
- [](const APSInt &A,
- const APSInt &B) {
- // Generic kunpack: extract
- // lower half of each operand
- // and concatenate Result =
- // (A[HalfWidth-1:0] <<
- // HalfWidth) |
- // B[HalfWidth-1:0]
- unsigned HalfWidth =
- A.getBitWidth() / 2;
- APSInt Result(
- A.getLoBits(HalfWidth)
- .zext(A.getBitWidth()),
- A.isUnsigned());
- Result <<= HalfWidth;
- Result |= APSInt(
- B.getLoBits(HalfWidth)
- .zext(B.getBitWidth()),
- B.isUnsigned());
- return Result;
- });
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+ // Generic kunpack: extract
+ // lower half of each operand
+ // and concatenate Result =
+ // (A[HalfWidth-1:0] <<
+ // HalfWidth) |
+ // B[HalfWidth-1:0]
+ unsigned HalfWidth = A.getBitWidth() / 2;
+ APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()),
+ A.isUnsigned());
+ Result <<= HalfWidth;
+ Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()),
+ B.isUnsigned());
+ return Result;
+ });
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 854d2969b6784..9f3d534ddd2f4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16300,7 +16300,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned HalfWidth = A.getBitWidth() / 2;
APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned());
Result <<= HalfWidth;
- Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned());
+ Result |=
+ APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned());
return Success(Result, E);
}
@@ -16430,7 +16431,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(APValue(Result), E);
}
-
case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
More information about the cfe-commits
mailing list