[clang] [Clang] Add constexpr support for AVX512 permutex2 intrinsics (PR #165085)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Oct 25 01:13:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: NagaChaitanya Vellanki (chaitanyav)
<details>
<summary>Changes</summary>
This patch enables compile-time evaluation of AVX512 permutex2var intrinsics in constexpr contexts.
Extend shuffle generic to handle both integer immediate and vector mask operands.
Resolves #<!-- -->161335
---
Patch is 112.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165085.diff
19 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+17-44)
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+85-3)
- (modified) clang/lib/AST/ExprConstant.cpp (+103-8)
- (modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+8-1)
- (modified) clang/lib/Headers/avx10_2bf16intrin.h (+12-3)
- (modified) clang/lib/Headers/avx512bwintrin.h (+8-12)
- (modified) clang/lib/Headers/avx512fintrin.h (+36-49)
- (modified) clang/lib/Headers/avx512fp16intrin.h (+2-2)
- (modified) clang/lib/Headers/avx512vbmiintrin.h (+22-26)
- (modified) clang/lib/Headers/avx512vbmivlintrin.h (+40-46)
- (modified) clang/lib/Headers/avx512vlbwintrin.h (+18-26)
- (modified) clang/lib/Headers/avx512vlfp16intrin.h (+4-4)
- (modified) clang/lib/Headers/avx512vlintrin.h (+40-37)
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+153)
- (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+120)
- (modified) clang/test/CodeGen/X86/avx512vbmi-builtins.c (+154)
- (modified) clang/test/CodeGen/X86/avx512vbmivl-builtin.c (+66-4)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+128)
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+80)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 0c85e280e748b..72e67d7dda3bc 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1746,75 +1746,48 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def scattersiv8si : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2vard128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpermi2varq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+ def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
+ def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
def vpermi2varq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+ def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
+ def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
def vpermi2varq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+ def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
+ def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d0b97a18e1815..f249a113a95ab 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3358,18 +3358,46 @@ static bool interp__builtin_ia32_shuffle_generic(
GetSourceIndex) {
assert(Call->getNumArgs() == 3);
- unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+
+ unsigned ShuffleMask = 0;
+ Pointer A, MaskVector, B;
+
+ QualType Arg2Type = Call->getArg(2)->getType();
+ bool IsVectorMask = false;
+ if (Arg2Type->isVectorType()) {
+ IsVectorMask = true;
+ B = S.Stk.pop<Pointer>();
+ MaskVector = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ } else if (Arg2Type->isIntegerType()) {
+ ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+ B = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ } else {
+ return false;
+ }
QualType Arg0Type = Call->getArg(0)->getType();
const auto *VecT = Arg0Type->castAs<VectorType>();
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();
- const Pointer &B = S.Stk.pop<Pointer>();
- const Pointer &A = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
+ PrimType MaskElemT = PT_Uint32;
+ if (IsVectorMask) {
+ QualType Arg1Type = Call->getArg(1)->getType();
+ const auto *MaskVecT = Arg1Type->castAs<VectorType>();
+ QualType MaskElemType = MaskVecT->getElementType();
+ MaskElemT = *S.getContext().classify(MaskElemType);
+ }
+
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
+ if (IsVectorMask) {
+ INT_TYPE_SWITCH(MaskElemT, {
+ ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
+ });
+ }
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
@@ -4345,6 +4373,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
});
+ case X86::BI__builtin_ia32_vpermi2varq128:
+ case X86::BI__builtin_ia32_vpermi2varpd128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1;
+ unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2vard128:
+ case X86::BI__builtin_ia32_vpermi2varps128:
+ case X86::BI__builtin_ia32_vpermi2varq256:
+ case X86::BI__builtin_ia32_vpermi2varpd256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varhi128:
+ case X86::BI__builtin_ia32_vpermi2vard256:
+ case X86::BI__builtin_ia32_vpermi2varps256:
+ case X86::BI__builtin_ia32_vpermi2varq512:
+ case X86::BI__builtin_ia32_vpermi2varpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi128:
+ case X86::BI__builtin_ia32_vpermi2varhi256:
+ case X86::BI__builtin_ia32_vpermi2vard512:
+ case X86::BI__builtin_ia32_vpermi2varps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi256:
+ case X86::BI__builtin_ia32_vpermi2varhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 29ee089505125..1427005b9bd79 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11628,21 +11628,38 @@ static bool evalShuffleGeneric(
if (!VT)
return false;
- APSInt MaskImm;
- if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
- return false;
- unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+ unsigned ShuffleMask = 0;
+ APValue A, MaskVector, B;
+ bool IsVectorMask = false;
- APValue A, B;
- if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
- !EvaluateAsRValue(Info, Call->getArg(1), B))
+ QualType Arg2Type = Call->getArg(2)->getType();
+ if (Arg2Type->isVectorType()) {
+ IsVectorMask = true;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) ||
+ !EvaluateAsRValue(Info, Call->getArg(2), B))
+ return false;
+ } else if (Arg2Type->isIntegerType()) {
+ APSInt MaskImm;
+ if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
+ return false;
+ ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+ if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), B))
+ return false;
+ } else {
return false;
+ }
unsigned NumElts = VT->getNumElements();
- SmallVector<APValue, 16> ResultElements;
+ SmallVector<APValue, 64> ResultElements;
ResultElements.reserve(NumElts);
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
+ if (IsVectorMask) {
+ ShuffleMask = static_cast<unsigned>(
+ MaskVector.getVectorElt(DstIdx).getInt().getZExtValue());
+ }
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const APValue &Src = (SrcVecIdx == 0) ? A : B;
ResultElements.push_back(Src.getVectorElt(SrcIdx));
@@ -13048,6 +13065,84 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpermi2varq128:
+ case X86::BI__builtin_ia32_vpermi2varpd128: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1;
+ unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2vard128:
+ case X86::BI__builtin_ia32_vpermi2varps128:
+ case X86::BI__builtin_ia32_vpermi2varq256:
+ case X86::BI__builtin_ia32_vpermi2varpd256: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varhi128:
+ case X86::BI__builtin_ia32_vpermi2vard256:
+ case X86::BI__builtin_ia32_vpermi2varps256:
+ case X86::BI__builtin_ia32_vpermi2varq512:
+ case X86::BI__builtin_ia32_vpermi2varpd512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi128:
+ case X86::BI__builtin_ia32_vpermi2varhi256:
+ case X86::BI__builtin_ia32_vpermi2vard512:
+ case X86::BI__builtin_ia32_vpermi2varps512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi256:
+ case X86::BI__builtin_ia32_vpermi2varhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
}
}
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 37ebc4f46a826..46ec12a63ef9c 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -24,6 +24,12 @@ typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
__min_vector_width__(512)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#endif
+
static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
}
@@ -167,7 +173,7 @@ _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
(__v32bf)__A);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
(__v32hi)__B);
@@ -555,6 +561,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
(__v32bf)_mm512_setzero_pbh());
}
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
#undef __DEFAULT_FN_ATTRS512
#endif
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 765cd682986b4..8fb8cd7cd0865 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -27,6 +27,14 @@ typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1)));
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
__min_vector_width__(128)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) {
return __builtin_bit_cast(__m256bh, _mm256_setzero_ps());
}
@@ -287,13 +295,13 @@ _mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
(__v16bf)__A);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
(__v8hi)__B);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
(__v16hi)__B);
@@ -1080,6 +1088,7 @@ _mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
-
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#endif
#endif
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index ac75b6ccde735..aab1f2b61ab8a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -969,35 +969,31 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi16(__m512i __A, _...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165085
More information about the cfe-commits
mailing list