[clang] [Clang][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow VPERMILPD/S variable mask intrinsics to be used in constexpr (PR #168861)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 20 03:54:23 PST 2025
https://github.com/stomfaig updated https://github.com/llvm/llvm-project/pull/168861
>From b0b258c0b1a613b43d43d25b1498df858fd7e01d Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Fri, 14 Nov 2025 20:48:04 +0000
Subject: [PATCH 1/7] adding cases for vpermilvarpd and vpermilvarps
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 22 +++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 30 ++++++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cee3c1b8cf8f3..ee0f9deaee46e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4563,6 +4563,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
});
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b1;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ });
+
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ });
+
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b7da89ab3dcf2..c3c084d67ab66 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13015,6 +13015,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512:
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b1;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512:
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+
case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
>From 18f7f8eb509a54d4ff437e3f44296c34a94fd9fa Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Fri, 14 Nov 2025 20:48:52 +0000
Subject: [PATCH 2/7] format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
clang/lib/AST/ExprConstant.cpp | 22 +++++++++++-----------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ee0f9deaee46e..9972c0924826f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4573,7 +4573,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Offset = ShuffleMask & 0b1;
return std::make_pair(0, static_cast<int>(Lane + Offset));
});
-
+
case X86::BI__builtin_ia32_vpermilvarps:
case X86::BI__builtin_ia32_vpermilvarps256:
case X86::BI__builtin_ia32_vpermilvarps512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c3c084d67ab66..e6f0a5964894f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13022,14 +13022,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
- unsigned NumElemPerLane = 2;
- unsigned Lane = DstIdx / NumElemPerLane;
- unsigned Offset = Mask & 0b1;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
- }))
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b1;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ }))
return false;
return Success(R, E);
-
+
case X86::BI__builtin_ia32_vpermilvarps:
case X86::BI__builtin_ia32_vpermilvarps256:
case X86::BI__builtin_ia32_vpermilvarps512:
@@ -13037,11 +13037,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
- unsigned NumElemPerLane = 4;
- unsigned Lane = DstIdx / NumElemPerLane;
- unsigned Offset = Mask & 0b11;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
- }))
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane + Offset));
+ }))
return false;
return Success(R, E);
>From 6926f0bd637a625d458a386d7dde957fb6b878bc Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 20 Nov 2025 09:50:40 +0000
Subject: [PATCH 3/7] correct logic
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++---
clang/lib/AST/ExprConstant.cpp | 14 ++++++++------
2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9972c0924826f..ce15048d6d31a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4570,8 +4570,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned NumElemPerLane = 2;
unsigned Lane = DstIdx / NumElemPerLane;
- unsigned Offset = ShuffleMask & 0b1;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
+ unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
});
case X86::BI__builtin_ia32_vpermilvarps:
@@ -4582,7 +4582,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned NumElemPerLane = 4;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = ShuffleMask & 0b11;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
});
case X86::BI__builtin_ia32_kandqi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e6f0a5964894f..e8b7f78848aeb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13017,22 +13017,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_vpermilvarpd:
case X86::BI__builtin_ia32_vpermilvarpd256:
- case X86::BI__builtin_ia32_vpermilvarpd512:
+ case X86::BI__builtin_ia32_vpermilvarpd512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R,
[](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
unsigned NumElemPerLane = 2;
unsigned Lane = DstIdx / NumElemPerLane;
- unsigned Offset = Mask & 0b1;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
+ unsigned Offset = Mask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
}))
return false;
return Success(R, E);
+ }
case X86::BI__builtin_ia32_vpermilvarps:
case X86::BI__builtin_ia32_vpermilvarps256:
- case X86::BI__builtin_ia32_vpermilvarps512:
+ case X86::BI__builtin_ia32_vpermilvarps512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R,
@@ -13040,11 +13041,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned NumElemPerLane = 4;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = Mask & 0b11;
- return std::make_pair(0, static_cast<int>(Lane + Offset));
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
}))
return false;
return Success(R, E);
-
+ }
+
case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
>From 62653fe6ccf55b01bbf6b9ea9de80139e2fed451 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 20 Nov 2025 09:52:01 +0000
Subject: [PATCH 4/7] make intrinsics constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 8 +++++++-
clang/lib/Headers/avx512fintrin.h | 12 ++++++------
clang/lib/Headers/avx512vlintrin.h | 16 ++++++++--------
clang/lib/Headers/avxintrin.h | 8 ++++----
4 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 69d18679fd6ec..9382a1168a294 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -493,11 +493,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2369,6 +2372,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 79c37173ac838..59b58f7a0680a 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5897,13 +5897,13 @@ _mm_cvttss_u64 (__m128 __A)
(__v16sf)_mm512_permute_ps((X), (C)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_pd(__m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5911,7 +5911,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5919,13 +5919,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_ps(__m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -5933,7 +5933,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 1e6e42df6b5fb..9697eacda2c7d 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5855,7 +5855,7 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
(__v8sf)_mm256_permute_ps((X), (C)), \
(__v8sf)_mm256_setzero_ps()))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5863,7 +5863,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5871,7 +5871,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5879,7 +5879,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5887,7 +5887,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5895,7 +5895,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5903,7 +5903,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5911,7 +5911,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..247530e2f56c0 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -789,7 +789,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
/// 1: Bits [127:64] of the source are copied to bits [127:64] of the
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_pd(__m128d __a, __m128i __c)
{
return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
@@ -828,7 +828,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
/// 1: Bits [255:192] of the source are copied to bits [255:192] of the
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_pd(__m256d __a, __m256i __c)
{
return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
@@ -883,7 +883,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
/// 11: Bits [127:96] of the source are copied to bits [127:96] of the
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_ps(__m128 __a, __m128i __c)
{
return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
@@ -974,7 +974,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
/// 11: Bits [255:224] of the source are copied to bits [255:224] of the
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_ps(__m256 __a, __m256i __c)
{
return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
>From 4786836d15f01a536ad1dd9a7b898c0437d85376 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 20 Nov 2025 09:52:17 +0000
Subject: [PATCH 5/7] add tests
---
clang/test/CodeGen/X86/avx-builtins.c | 28 +++++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 48 +++++++++++++++
clang/test/CodeGen/X86/avx512vl-builtins.c | 68 ++++++++++++++++++++++
3 files changed, 144 insertions(+)
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..ec58d646117aa 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1447,24 +1447,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})
return _mm_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutevar_pd(
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutevar_pd(
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0
+));
__m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
// CHECK-LABEL: test_mm_permutevar_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}})
return _mm_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutevar_ps(
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0
+));
__m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutevar_ps(
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0
+));
__m256 test_mm256_rcp_ps(__m256 A) {
// CHECK-LABEL: test_mm256_rcp_ps
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 71e700af0069e..33047fd351039 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5488,6 +5488,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
// CHECK: @llvm.x86.avx512.vpermilvar.pd.512
return _mm512_permutevar_pd(__A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutevar_pd(
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0
+));
__m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5495,6 +5502,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutevar_pd(
+ ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0
+));
__m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5502,12 +5518,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutevar_pd(
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0
+));
__m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_ps
// CHECK: @llvm.x86.avx512.vpermilvar.ps.512
return _mm512_permutevar_ps(__A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutevar_ps(
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 13.0, 12.0
+));
__m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5515,6 +5546,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutevar_ps(
+ ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 29.0, 13.0, 31.0
+));
__m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5522,6 +5562,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_maskz_permutevar_ps(
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 13.0, 0.0
+));
__m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
// CHECK-LABEL: test_mm512_permutex2var_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index a7eee79c97539..be0d1bbd4fdbf 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8007,6 +8007,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_permutevar_pd(
+ ((__m128d){3.0, 4.0}),
+ (__mmask8)0b01,
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 4.0
+));
__m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_permutevar_pd
@@ -8014,6 +8023,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_maskz_permutevar_pd(
+ (__mmask8)0b01,
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_permutevar_pd
@@ -8021,6 +8038,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_permutevar_pd(
+ ((__m256d){4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b0101,
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 5.0, 3.0, 7.0
+));
__m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_permutevar_pd
@@ -8028,6 +8054,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_maskz_permutevar_pd(
+ (__mmask8)0b0101,
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 0.0
+));
__m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
// CHECK-LABEL: test_mm_mask_permutevar_ps
@@ -8035,6 +8069,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_permutevar_ps(
+ ((__m128){4.0, 5.0, 6.0, 7.0}),
+ (__mmask8)0b0101,
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 5.0, 1.0, 7.0
+));
__m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
// CHECK-LABEL: test_mm_maskz_permutevar_ps
@@ -8042,6 +8085,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_maskz_permutevar_ps(
+ (__mmask8)0b0101,
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 0.0, 1.0, 0.0
+));
__m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
// CHECK-LABEL: test_mm256_mask_permutevar_ps
@@ -8049,6 +8100,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m25
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_permutevar_ps(
+ ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 9.0, 1.0, 11.0, 7.0, 13.0, 5.0, 15.0
+));
__m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
// CHECK-LABEL: test_mm256_maskz_permutevar_ps
@@ -8056,6 +8116,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_maskz_permutevar_ps(
+ (__mmask8)0b01010101,
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0
+));
__mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_test_epi32_mask
>From 00ae3e0687859c5982057b7f71622324d7473865 Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 20 Nov 2025 11:47:28 +0000
Subject: [PATCH 6/7] format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
clang/lib/AST/ExprConstant.cpp | 2 +-
clang/lib/Headers/avx512fintrin.h | 18 ++++++------------
clang/lib/Headers/avx512vlintrin.h | 24 ++++++++----------------
clang/lib/Headers/avxintrin.h | 12 ++++--------
5 files changed, 20 insertions(+), 38 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8bccac746fb51..316595b81224c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4652,7 +4652,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Offset = ShuffleMask & 0b11;
return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
});
-
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0ee748075a6e0..36a37723d75be 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13058,7 +13058,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
-
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 3f5028f335155..e1de56069870b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5880,44 +5880,38 @@ _mm_cvttss_u64 (__m128 __A)
(__v16sf)_mm512_setzero_ps()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_permutevar_pd(__m512d __A, __m512i __C)
-{
+_mm512_permutevar_pd(__m512d __A, __m512i __C) {
return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
-{
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutevar_pd(__A, __C),
(__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
-{
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_permutevar_pd(__A, __C),
(__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_permutevar_ps(__m512 __A, __m512i __C)
-{
+_mm512_permutevar_ps(__m512 __A, __m512i __C) {
return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
-{
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutevar_ps(__A, __C),
(__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
-{
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_permutevar_ps(__A, __C),
(__v16sf)_mm512_setzero_ps());
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index e7407bd1c722c..99c057030a4cc 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5848,64 +5848,56 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
(__v8sf)_mm256_setzero_ps()))
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
-{
+_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_permutevar_pd(__A, __C),
(__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
-{
+_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_permutevar_pd(__A, __C),
(__v2df)_mm_setzero_pd());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
-{
+_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_permutevar_pd(__A, __C),
(__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
-{
+_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_permutevar_pd(__A, __C),
(__v4df)_mm256_setzero_pd());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
-{
+_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_permutevar_ps(__A, __C),
(__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
-{
+_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_permutevar_ps(__A, __C),
(__v4sf)_mm_setzero_ps());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
-{
+_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_permutevar_ps(__A, __C),
(__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
-{
+_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_permutevar_ps(__A, __C),
(__v8sf)_mm256_setzero_ps());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 1ea15b3e68811..44ef88db5cbce 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -788,8 +788,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_permutevar_pd(__m128d __a, __m128i __c)
-{
+_mm_permutevar_pd(__m128d __a, __m128i __c) {
return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
}
@@ -827,8 +826,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_permutevar_pd(__m256d __a, __m256i __c)
-{
+_mm256_permutevar_pd(__m256d __a, __m256i __c) {
return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
}
@@ -882,8 +880,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_permutevar_ps(__m128 __a, __m128i __c)
-{
+_mm_permutevar_ps(__m128 __a, __m128i __c) {
return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
}
@@ -973,8 +970,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_permutevar_ps(__m256 __a, __m256i __c)
-{
+_mm256_permutevar_ps(__m256 __a, __m256i __c) {
return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
}
>From 7459dc456239005e395abfe19e67bd870d09922a Mon Sep 17 00:00:00 2001
From: stomfaig <stomfaig at gmail.com>
Date: Thu, 20 Nov 2025 11:53:47 +0000
Subject: [PATCH 7/7] format again
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++++--
clang/lib/AST/ExprConstant.cpp | 8 +++++---
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 316595b81224c..511b8032b28e6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4639,7 +4639,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned NumElemPerLane = 2;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
- return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ return std::make_pair(
+ 0, static_cast<int>(Lane * NumElemPerLane + Offset));
});
case X86::BI__builtin_ia32_vpermilvarps:
@@ -4650,7 +4651,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned NumElemPerLane = 4;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = ShuffleMask & 0b11;
- return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ return std::make_pair(
+ 0, static_cast<int>(Lane * NumElemPerLane + Offset));
});
case X86::BI__builtin_ia32_vpermilpd:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36a37723d75be..5c4e844ece04c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13053,7 +13053,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned NumElemPerLane = 2;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = Mask & 0b10 ? 1 : 0;
- return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ return std::make_pair(
+ 0, static_cast<int>(Lane * NumElemPerLane + Offset));
}))
return false;
return Success(R, E);
@@ -13088,12 +13089,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
unsigned NumElemPerLane = 4;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned Offset = Mask & 0b11;
- return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ return std::make_pair(
+ 0, static_cast<int>(Lane * NumElemPerLane + Offset));
}))
return false;
return Success(R, E);
}
-
+
case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
More information about the cfe-commits
mailing list