[clang] [Clang] Add constexpr support for AVX512 permutex2 intrinsics (PR #165085)
NagaChaitanya Vellanki via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 29 08:59:46 PDT 2025
https://github.com/chaitanyav updated https://github.com/llvm/llvm-project/pull/165085
>From 8c3309c971773586bf558c6d218b238ad037763b Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Fri, 24 Oct 2025 11:38:31 -0700
Subject: [PATCH 1/2] [Clang] Add constexpr support for AVX512 permutex2
intrinsics
This patch enables compile-time evaluation of AVX512 permutex2var
intrinsics in constexpr contexts.
Extend shuffle generic to handle both integer immediate and vector mask
operands.
Resolves #161335
---
clang/include/clang/Basic/BuiltinsX86.td | 61 ++-----
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 88 +++++++++-
clang/lib/AST/ExprConstant.cpp | 111 ++++++++++++-
clang/lib/Headers/avx10_2_512bf16intrin.h | 9 +-
clang/lib/Headers/avx10_2bf16intrin.h | 15 +-
clang/lib/Headers/avx512bwintrin.h | 20 +--
clang/lib/Headers/avx512fintrin.h | 85 ++++------
clang/lib/Headers/avx512fp16intrin.h | 4 +-
clang/lib/Headers/avx512vbmiintrin.h | 48 +++---
clang/lib/Headers/avx512vbmivlintrin.h | 86 +++++-----
clang/lib/Headers/avx512vlbwintrin.h | 44 ++---
clang/lib/Headers/avx512vlfp16intrin.h | 8 +-
clang/lib/Headers/avx512vlintrin.h | 77 ++++-----
clang/test/CodeGen/X86/avx512bw-builtins.c | 153 +++++++++++++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 120 ++++++++++++++
clang/test/CodeGen/X86/avx512vbmi-builtins.c | 154 ++++++++++++++++++
clang/test/CodeGen/X86/avx512vbmivl-builtin.c | 70 +++++++-
clang/test/CodeGen/X86/avx512vl-builtins.c | 128 +++++++++++++++
clang/test/CodeGen/X86/avx512vlbw-builtins.c | 80 +++++++++
19 files changed, 1096 insertions(+), 265 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 500aa85fe5356..cbb51c107830c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1764,75 +1764,48 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def scattersiv8si : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2vard128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpermi2varq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
+ def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
+ def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
def vpermi2varq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
+ def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
+ def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
def vpermi2varq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
+ def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
+ def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b3ab82da5e01a..3731df0dd1699 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3415,18 +3415,46 @@ static bool interp__builtin_ia32_shuffle_generic(
GetSourceIndex) {
assert(Call->getNumArgs() == 3);
- unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+
+ unsigned ShuffleMask = 0;
+ Pointer A, MaskVector, B;
+
+ QualType Arg2Type = Call->getArg(2)->getType();
+ bool IsVectorMask = false;
+ if (Arg2Type->isVectorType()) {
+ IsVectorMask = true;
+ B = S.Stk.pop<Pointer>();
+ MaskVector = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ } else if (Arg2Type->isIntegerType()) {
+ ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+ B = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ } else {
+ return false;
+ }
QualType Arg0Type = Call->getArg(0)->getType();
const auto *VecT = Arg0Type->castAs<VectorType>();
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();
- const Pointer &B = S.Stk.pop<Pointer>();
- const Pointer &A = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
+ PrimType MaskElemT = PT_Uint32;
+ if (IsVectorMask) {
+ QualType Arg1Type = Call->getArg(1)->getType();
+ const auto *MaskVecT = Arg1Type->castAs<VectorType>();
+ QualType MaskElemType = MaskVecT->getElementType();
+ MaskElemT = *S.getContext().classify(MaskElemType);
+ }
+
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
+ if (IsVectorMask) {
+ INT_TYPE_SWITCH(MaskElemT, {
+ ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
+ });
+ }
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
@@ -4402,6 +4430,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
});
+ case X86::BI__builtin_ia32_vpermi2varq128:
+ case X86::BI__builtin_ia32_vpermi2varpd128:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1;
+ unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2vard128:
+ case X86::BI__builtin_ia32_vpermi2varps128:
+ case X86::BI__builtin_ia32_vpermi2varq256:
+ case X86::BI__builtin_ia32_vpermi2varpd256:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varhi128:
+ case X86::BI__builtin_ia32_vpermi2vard256:
+ case X86::BI__builtin_ia32_vpermi2varps256:
+ case X86::BI__builtin_ia32_vpermi2varq512:
+ case X86::BI__builtin_ia32_vpermi2varpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi128:
+ case X86::BI__builtin_ia32_vpermi2varhi256:
+ case X86::BI__builtin_ia32_vpermi2vard512:
+ case X86::BI__builtin_ia32_vpermi2varps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi256:
+ case X86::BI__builtin_ia32_vpermi2varhi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
+ case X86::BI__builtin_ia32_vpermi2varqi512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ });
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d0404b957ab03..21fae8937a36a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11628,21 +11628,38 @@ static bool evalShuffleGeneric(
if (!VT)
return false;
- APSInt MaskImm;
- if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
- return false;
- unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+ unsigned ShuffleMask = 0;
+ APValue A, MaskVector, B;
+ bool IsVectorMask = false;
- APValue A, B;
- if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
- !EvaluateAsRValue(Info, Call->getArg(1), B))
+ QualType Arg2Type = Call->getArg(2)->getType();
+ if (Arg2Type->isVectorType()) {
+ IsVectorMask = true;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), MaskVector) ||
+ !EvaluateAsRValue(Info, Call->getArg(2), B))
+ return false;
+ } else if (Arg2Type->isIntegerType()) {
+ APSInt MaskImm;
+ if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
+ return false;
+ ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
+ if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
+ !EvaluateAsRValue(Info, Call->getArg(1), B))
+ return false;
+ } else {
return false;
+ }
unsigned NumElts = VT->getNumElements();
- SmallVector<APValue, 16> ResultElements;
+ SmallVector<APValue, 64> ResultElements;
ResultElements.reserve(NumElts);
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
+ if (IsVectorMask) {
+ ShuffleMask = static_cast<unsigned>(
+ MaskVector.getVectorElt(DstIdx).getInt().getZExtValue());
+ }
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const APValue &Src = (SrcVecIdx == 0) ? A : B;
ResultElements.push_back(Src.getVectorElt(SrcIdx));
@@ -13048,6 +13065,84 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpermi2varq128:
+ case X86::BI__builtin_ia32_vpermi2varpd128: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1;
+ unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2vard128:
+ case X86::BI__builtin_ia32_vpermi2varps128:
+ case X86::BI__builtin_ia32_vpermi2varq256:
+ case X86::BI__builtin_ia32_vpermi2varpd256: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3;
+ unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varhi128:
+ case X86::BI__builtin_ia32_vpermi2vard256:
+ case X86::BI__builtin_ia32_vpermi2varps256:
+ case X86::BI__builtin_ia32_vpermi2varq512:
+ case X86::BI__builtin_ia32_vpermi2varpd512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x7;
+ unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi128:
+ case X86::BI__builtin_ia32_vpermi2varhi256:
+ case X86::BI__builtin_ia32_vpermi2vard512:
+ case X86::BI__builtin_ia32_vpermi2varps512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0xF;
+ unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi256:
+ case X86::BI__builtin_ia32_vpermi2varhi512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x1F;
+ unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
+ case X86::BI__builtin_ia32_vpermi2varqi512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned offset = ShuffleMask & 0x3F;
+ unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
+ return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ }))
+ return false;
+ return Success(R, E);
+ }
}
}
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index 37ebc4f46a826..46ec12a63ef9c 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -24,6 +24,12 @@ typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
__min_vector_width__(512)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#endif
+
static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
}
@@ -167,7 +173,7 @@ _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
(__v32bf)__A);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
(__v32hi)__B);
@@ -555,6 +561,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
(__v32bf)_mm512_setzero_pbh());
}
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
#undef __DEFAULT_FN_ATTRS512
#endif
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 765cd682986b4..8fb8cd7cd0865 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -27,6 +27,14 @@ typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1)));
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
__min_vector_width__(128)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) {
return __builtin_bit_cast(__m256bh, _mm256_setzero_ps());
}
@@ -287,13 +295,13 @@ _mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
(__v16bf)__A);
}
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
(__v8hi)__B);
}
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
(__v16hi)__B);
@@ -1080,6 +1088,7 @@ _mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
-
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#endif
#endif
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index ac75b6ccde735..aab1f2b61ab8a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -969,35 +969,31 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
(__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
(__v32hi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
(__v32hi)__I);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(__U,
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
(__v32hi)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 18c4a44a4c76e..5fc0afa49ce4c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3059,69 +3059,61 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
/* Vector permutations */
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
(__v16si) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
(__v16si)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
(__v16si)__I);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
(__v8di) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
(__v8di)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
(__v8di)__I);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512(__U,
(__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
(__v8di)_mm512_setzero_si512());
@@ -5949,71 +5941,66 @@ _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
(__v16sf)_mm512_setzero_ps());
}
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
(__v8df)__B);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
+ __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512(__U,
(__v8df)_mm512_permutex2var_pd(__A, __I, __B),
(__v8df)__A);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
- __m512d __B)
-{
+ __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512(__U,
(__v8df)_mm512_permutex2var_pd(__A, __I, __B),
(__v8df)(__m512d)__I);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
- __m512d __B)
-{
+ __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512(__U,
(__v8df)_mm512_permutex2var_pd(__A, __I, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
(__v16sf) __B);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
+ __m512 __B) {
return (__m512)__builtin_ia32_selectps_512(__U,
(__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
(__v16sf)__A);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
+ __m512 __B) {
return (__m512)__builtin_ia32_selectps_512(__U,
(__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
(__v16sf)(__m512)__I);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
+ __m512 __B) {
return (__m512)__builtin_ia32_selectps_512(__U,
(__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
(__v16sf)_mm512_setzero_ps());
}
-
#define _mm512_cvtt_roundpd_epu32(A, R) \
((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
(__v8si)_mm256_undefined_si256(), \
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 142cc079c2c4b..25051228f3e0a 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -3316,13 +3316,13 @@ _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
(__v32hf)__A);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
(__v32hi)__B);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutexvar_ph(__m512i __A, __m512h __B) {
return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
}
diff --git a/clang/lib/Headers/avx512vbmiintrin.h b/clang/lib/Headers/avx512vbmiintrin.h
index 964535c4c4900..84fda5c5849e8 100644
--- a/clang/lib/Headers/avx512vbmiintrin.h
+++ b/clang/lib/Headers/avx512vbmiintrin.h
@@ -19,59 +19,57 @@
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
__min_vector_width__(512)))
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
(__v64qi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__I);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
- __m512i __B)
-{
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)__W);
@@ -99,8 +97,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
(__v64qi)_mm512_setzero_si512());
}
-
-
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#undef __DEFAULT_FN_ATTRS
-
#endif
diff --git a/clang/lib/Headers/avx512vbmivlintrin.h b/clang/lib/Headers/avx512vbmivlintrin.h
index 4c50be7d9e7e5..58a48dadff863 100644
--- a/clang/lib/Headers/avx512vbmivlintrin.h
+++ b/clang/lib/Headers/avx512vbmivlintrin.h
@@ -24,117 +24,110 @@
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(256)))
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
(__v16qi)__I,
(__v16qi)__B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
- __m128i __B)
-{
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
- __m128i __B)
-{
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__I);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
- __m128i __B)
-{
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
(__v32qi)__B);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
- __m256i __B)
-{
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
- __m256i __B)
-{
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__I);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
- __m256i __B)
-{
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
- __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
- __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
- __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)__W);
@@ -186,7 +179,8 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
(__v32qi)_mm256_setzero_si256());
}
-
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 263a1079b26d5..575c0c8962662 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -1223,69 +1223,61 @@ _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
(__v8hi) __B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
- __m128i __B)
-{
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
(__v8hi)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
- __m128i __B)
-{
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
(__v8hi)__I);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
- __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128(__U,
(__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
(__v16hi)__B);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
- __m256i __B)
-{
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
(__v16hi)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
- __m256i __B)
-{
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
(__v16hi)__I);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
- __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256(__U,
(__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
(__v16hi)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 5b2b3f0d0bbd4..885231b030b23 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -2010,24 +2010,24 @@ _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
(__v16hf)__A);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
(__v8hi)__B);
}
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
(__v16hi)__B);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutexvar_ph(__m128i __A, __m128h __B) {
return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
}
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
}
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 92bb444aeb5b8..e5249926b934e 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -3556,13 +3556,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
(__v4si)__B);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
__m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3570,7 +3570,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4si)__A);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
__m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3578,7 +3578,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4si)__I);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
__m128i __B) {
return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3586,13 +3586,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4si)_mm_setzero_si128());
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
(__v8si) __B);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
__m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3600,7 +3600,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8si)__A);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3608,7 +3608,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8si)__I);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
__m256i __B) {
return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3616,40 +3616,43 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8si)_mm256_setzero_si256());
}
- static __inline__ __m128d __DEFAULT_FN_ATTRS128
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
(__v2df)__B);
}
- static __inline__ __m128d __DEFAULT_FN_ATTRS128
- _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+ _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I,
+ __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128(__U,
(__v2df)_mm_permutex2var_pd(__A, __I, __B),
(__v2df)__A);
}
- static __inline__ __m128d __DEFAULT_FN_ATTRS128
- _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+ _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U,
+ __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128(__U,
(__v2df)_mm_permutex2var_pd(__A, __I, __B),
(__v2df)(__m128d)__I);
}
- static __inline__ __m128d __DEFAULT_FN_ATTRS128
- _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
+ static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+ _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I,
+ __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128(__U,
(__v2df)_mm_permutex2var_pd(__A, __I, __B),
(__v2df)_mm_setzero_pd());
}
- static __inline__ __m256d __DEFAULT_FN_ATTRS256
+ static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
(__v4df)__B);
}
- static __inline__ __m256d __DEFAULT_FN_ATTRS256
+ static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
__m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3657,7 +3660,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4df)__A);
}
- static __inline__ __m256d __DEFAULT_FN_ATTRS256
+ static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
__m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3665,7 +3668,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4df)(__m256d)__I);
}
- static __inline__ __m256d __DEFAULT_FN_ATTRS256
+ static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
__m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3673,47 +3676,48 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4df)_mm256_setzero_pd());
}
- static __inline__ __m128 __DEFAULT_FN_ATTRS128
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
(__v4sf)__B);
}
- static __inline__ __m128 __DEFAULT_FN_ATTRS128
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128(__U,
(__v4sf)_mm_permutex2var_ps(__A, __I, __B),
(__v4sf)__A);
}
- static __inline__ __m128 __DEFAULT_FN_ATTRS128
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128(__U,
(__v4sf)_mm_permutex2var_ps(__A, __I, __B),
(__v4sf)(__m128)__I);
}
- static __inline__ __m128 __DEFAULT_FN_ATTRS128
+ static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128(__U,
(__v4sf)_mm_permutex2var_ps(__A, __I, __B),
(__v4sf)_mm_setzero_ps());
}
- static __inline__ __m256 __DEFAULT_FN_ATTRS256
+ static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
(__v8sf) __B);
}
- static __inline__ __m256 __DEFAULT_FN_ATTRS256
- _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
+ static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+ _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I,
+ __m256 __B) {
return (__m256)__builtin_ia32_selectps_256(__U,
(__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
(__v8sf)__A);
}
- static __inline__ __m256 __DEFAULT_FN_ATTRS256
+ static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
__m256 __B) {
return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3721,7 +3725,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)(__m256)__I);
}
- static __inline__ __m256 __DEFAULT_FN_ATTRS256
+ static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
__m256 __B) {
return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3729,13 +3733,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
(__v2di)__B);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
__m128i __B) {
return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3743,7 +3747,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v2di)__A);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
__m128i __B) {
return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3751,7 +3755,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v2di)__I);
}
- static __inline__ __m128i __DEFAULT_FN_ATTRS128
+ static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
__m128i __B) {
return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3759,14 +3763,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v2di)_mm_setzero_si128());
}
-
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
(__v4di) __B);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
__m256i __B) {
return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3774,7 +3777,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4di)__A);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
__m256i __B) {
return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3782,7 +3785,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
(__v4di)__I);
}
- static __inline__ __m256i __DEFAULT_FN_ATTRS256
+ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
__m256i __B) {
return (__m256i)__builtin_ia32_selectq_256(__U,
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index be2cd480f7558..e6e2e38bcc097 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1591,6 +1591,132 @@ __m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i
return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 400, 10, 410, 20, 420, 30, 430,
+ 40, 440, 50, 450, 60, 460, 70, 470,
+ 80, 480, 90, 490, 100, 500, 110, 510,
+ 120, 520, 130, 530, 140, 540, 150, 550));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ -1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24,
+ -25, -26, -27, -28, -29, -30, -31, -32},
+ 0xAAAAAAAA,
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ -1, 400, -3, 410, -5, 420, -7, 430,
+ -9, 440, -11, 450, -13, 460, -15, 470,
+ -17, 480, -19, 490, -21, 500, -23, 510,
+ -25, 520, -27, 530, -29, 540, -31, 550));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_maskz_permutex2var_epi16(
+ 0x55555555,
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 0, 10, 0, 20, 0, 30, 0,
+ 40, 0, 50, 0, 60, 0, 70, 0,
+ 80, 0, 90, 0, 100, 0, 110, 0,
+ 120, 0, 130, 0, 140, 0, 150, 0));
+
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_permutex2var_epi8(
+ (__m512i)(__v64qu){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 127, 126, 125,
+ 124, 123, 122, 121, 120, 119, 118, 117,
+ 116, 115, 114, 113, 112, 111, 110, 109,
+ 108, 107, 106, 105, 104, 103, 102, 101,
+ 100, 99, 98, 97, 96, 95, 94, 93,
+ 92, 91, 90, 89, 88, 87, 86, 85,
+ 84, 83, 82, 81, 80, 79, 78, 77},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 210, 220, 230, 240, 250, 254, 253,
+ 252, 251, 250, 249, 248, 247, 246, 245,
+ 244, 243, 242, 241, 240, 239, 238, 237,
+ 236, 235, 234, 233, 232, 231, 230, 229,
+ 228, 227, 226, 225, 224, 223, 222, 221,
+ 220, 219, 218, 217, 216, 215, 214, 213,
+ 212, 211, 210, 209, 208, 207, 206, 205,
+ 204, 203, 202, 201, 200, 199, 198, 197}),
+ 0, 200, 10, 210, 20, 220, 30, 230,
+ 40, 240, 50, 250, 60, 254, 70, 253,
+ 80, 252, 90, 251, 100, 250, 110, 249,
+ 120, 248, 127, 247, 126, 246, 125, 245,
+ 124, 244, 123, 243, 122, 242, 121, 241,
+ 120, 240, 119, 239, 118, 238, 117, 237,
+ 116, 236, 115, 235, 114, 234, 113, 233,
+ 112, 232, 111, 231, 110, 230, 109, 229));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask2_permutex2var_epi16(
+ (__m512i)(__v32hi){
+ 0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150,
+ 160, 170, 180, 190, 200, 210, 220, 230,
+ 240, 250, 260, 270, 280, 290, 300, 310},
+ (__m512i)(__v32hi){
+ 0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ 0x55555555,
+ (__m512i)(__v32hi){
+ 400, 410, 420, 430, 440, 450, 460, 470,
+ 480, 490, 500, 510, 520, 530, 540, 550,
+ 560, 570, 580, 590, 600, 610, 620, 630,
+ 640, 650, 660, 670, 680, 690, 700, 710}),
+ 0, 32, 10, 33, 20, 34, 30, 35,
+ 40, 36, 50, 37, 60, 38, 70, 39,
+ 80, 40, 90, 41, 100, 42, 110, 43,
+ 120, 44, 130, 45, 140, 46, 150, 47));
+
__m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mulhrs_epi16
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
@@ -2578,6 +2704,33 @@ __m512i test_mm512_broadcastw_epi16(__m128i __A) {
return _mm512_broadcastw_epi16(__A);
}
TEST_CONSTEXPR(match_v32hi(_mm512_broadcastw_epi16((__m128i)(__v8hi){42, 3, 10, 8, 0, 256, 256, 128}), 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_permutex2var_epi16((__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ (__m512i)(__v32hi){0, 31, 32, 63, 1, 33, 2, 34,
+ 3, 35, 4, 36, 5, 37, 6, 38,
+ 7, 39, 8, 40, 9, 41, 10, 42,
+ 11, 43, 12, 44, 13, 45, 14, 46},
+ (__m512i)(__v32hi){101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132}),
+ 1, 32, 101, 132, 2, 102, 3, 103,
+ 4, 104, 5, 105, 6, 106, 7, 107,
+ 8, 108, 9, 109, 10, 110, 11, 111,
+ 12, 112, 13, 113, 14, 114, 15, 115));
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_permutex2var_epi16((__m512i)(__v32hi){-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32},
+ 0xAAAAAAAA,
+ (__m512i)(__v32hi){0, 31, 32, 63, 1, 33, 2, 34,
+ 3, 35, 4, 36, 5, 37, 6, 38,
+ 7, 39, 8, 40, 9, 41, 10, 42,
+ 11, 43, 12, 44, 13, 45, 14, 46},
+ (__m512i)(__v32hi){101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132}),
+ -1, -32, -3, 132, -5, 102, -7, 103,
+ -9, 104, -11, 105, -13, 106, -15, 107,
+ -17, 108, -19, 109, -21, 110, -23, 111,
+ -25, 112, -27, 113, -29, 114, -31, 115));
__m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_mask_broadcastw_epi16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 69599379b6b3d..8e65430bd3e84 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5607,6 +5607,56 @@ __m512i test_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i _
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_permutex2var_epi64(__U, __A, __I, __B);
}
+
+TEST_CONSTEXPR(match_v16si(
+ _mm512_permutex2var_epi32(
+ (__m512i)(__v16si){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 150, 200, 350, 10, 210, 20, 220,
+ 30, 230, 40, 240, 50, 250, 60, 260));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_permutex2var_epi32(
+ (__m512i)(__v16si){-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16},
+ 0xAAAA,
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ -1, -16, -3, 350, -5, 210, -7, 220,
+ -9, 230, -11, 240, -13, 250, -15, 260));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_permutex2var_epi32(
+ 0x5555,
+ (__m512i)(__v16si){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 0, 200, 0, 10, 0, 20, 0,
+ 30, 0, 40, 0, 50, 0, 60, 0));
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutex2var_ps(
+ (__m512){1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ 1.f, 16.f, 101.f, 116.f, 2.f, 102.f, 3.f, 103.f,
+ 4.f, 104.f, 5.f, 105.f, 6.f, 106.f, 7.f, 107.f));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutex2var_pd(
+ (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 108.0, 1.0, 8.0, 2.0, 2.0, 3.0, 3.0));
+
__mmask16 test_mm512_testn_epi32_mask(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_testn_epi32_mask
// CHECK: and <16 x i32> %{{.*}}, %{{.*}}
@@ -11753,3 +11803,73 @@ void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i _
// CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
_mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
}
+
+
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutex2var_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 108.0, 1.0, 8.0, 2.0, 2.0, 3.0, 3.0));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutex2var_pd((__m512d){-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0},
+ 0xAA,
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ -1.0, 108.0, -3.0, -8.0, -5.0, -2.0, -7.0, -3.0));
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutex2var_pd(0x55, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512d){101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0}),
+ 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0));
+
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutex2var_ps((__m512){1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
+ 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ 1.f, 16.f, 101.f, 116.f, 2.f, 102.f, 3.f, 103.f,
+ 4.f, 104.f, 5.f, 105.f, 6.f, 106.f, 7.f, 107.f));
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutex2var_ps((__m512){-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f,
+ -9.f, -10.f, -11.f, -12.f, -13.f, -14.f, -15.f, -16.f},
+ 0xAAAA,
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512){101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f,
+ 109.f, 110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f}),
+ -1.f, -16.f, -3.f, 116.f, -5.f, 102.f, -7.f, 103.f,
+ -9.f, 104.f, -11.f, 105.f, -13.f, 106.f, -15.f, 107.f));
+
+TEST_CONSTEXPR(match_v16si(
+ _mm512_permutex2var_epi32((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 16, 101, 116, 2, 102, 3, 103,
+ 4, 104, 5, 105, 6, 106, 7, 107));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_permutex2var_epi32(0x5555,
+ (__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m512i)(__v16si){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m512i)(__v16si){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 0, 101, 0, 2, 0, 3, 0,
+ 4, 0, 5, 0, 6, 0, 7, 0));
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_permutex2var_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512i)(__v8di){101, 102, 103, 104, 105, 106, 107, 108}),
+ 1, 108, 1, 8, 2, 2, 3, 3));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_permutex2var_epi64((__m512i)(__v8di){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m512i)(__v8di){0, 15, 16, 23, 1, 17, 2, 18},
+ (__m512i)(__v8di){101, 102, 103, 104, 105, 106, 107, 108}),
+ -1, 108, -3, -8, -5, -2, -7, -3));
diff --git a/clang/test/CodeGen/X86/avx512vbmi-builtins.c b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
index c3b6298a39b59..7d506db92faeb 100644
--- a/clang/test/CodeGen/X86/avx512vbmi-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
@@ -3,8 +3,14 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m512i test_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
// CHECK-LABEL: test_mm512_mask2_permutex2var_epi8
@@ -33,6 +39,154 @@ __m512i test_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i _
return _mm512_maskz_permutex2var_epi8(__U, __A, __I, __B);
}
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_permutex2var_epi8((__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 200, 1, 201, 2, 202, 3, 203,
+ 4, 204, 5, 205, 6, 206, 7, 207,
+ 8, 208, 9, 209, 10, 210, 11, 211,
+ 12, 212, 13, 213, 14, 214, 15, 215,
+ 16, 216, 17, 217, 18, 218, 19, 219,
+ 20, 220, 21, 221, 22, 222, 23, 223,
+ 24, 224, 25, 225, 26, 226, 27, 227,
+ 28, 228, 29, 229, 30, 230, 31, 231));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask_permutex2var_epi8((__m512i)(__v64qu){
+ 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57,
+ 58, 59, 60, 61, 62, 63, 64, 65,
+ 66, 67, 68, 69, 70, 71, 72, 73},
+ 0xAAAAAAAAAAAAAAAAULL,
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 10, 200, 12, 201, 14, 202, 16, 203,
+ 18, 204, 20, 205, 22, 206, 24, 207,
+ 26, 208, 28, 209, 30, 210, 32, 211,
+ 34, 212, 36, 213, 38, 214, 40, 215,
+ 42, 216, 44, 217, 46, 218, 48, 219,
+ 50, 220, 52, 221, 54, 222, 56, 223,
+ 58, 224, 60, 225, 62, 226, 64, 227,
+ 66, 228, 68, 229, 70, 230, 72, 231));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_maskz_permutex2var_epi8(0x5555555555555555ULL,
+ (__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 0, 1, 0, 2, 0, 3, 0,
+ 4, 0, 5, 0, 6, 0, 7, 0,
+ 8, 0, 9, 0, 10, 0, 11, 0,
+ 12, 0, 13, 0, 14, 0, 15, 0,
+ 16, 0, 17, 0, 18, 0, 19, 0,
+ 20, 0, 21, 0, 22, 0, 23, 0,
+ 24, 0, 25, 0, 26, 0, 27, 0,
+ 28, 0, 29, 0, 30, 0, 31, 0));
+TEST_CONSTEXPR(match_v64qu(
+ _mm512_mask2_permutex2var_epi8((__m512i)(__v64qu){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63},
+ (__m512i)(__v64qu){
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95},
+ 0x5555555555555555ULL,
+ (__m512i)(__v64qu){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ 0, 1, 2, 3, 4, 5, 6, 7}),
+ 0, 64, 1, 65, 2, 66, 3, 67,
+ 4, 68, 5, 69, 6, 70, 7, 71,
+ 8, 72, 9, 73, 10, 74, 11, 75,
+ 12, 76, 13, 77, 14, 78, 15, 79,
+ 16, 80, 17, 81, 18, 82, 19, 83,
+ 20, 84, 21, 85, 22, 86, 23, 87,
+ 24, 88, 25, 89, 26, 90, 27, 91,
+ 28, 92, 29, 93, 30, 94, 31, 95));
+
__m512i test_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_permutexvar_epi8
// CHECK: call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %{{.*}}, <64 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
index c4d5fc8fb6977..49b7a1a721195 100644
--- a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
+++ b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
@@ -3,8 +3,14 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vbmi -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_permutexvar_epi8
@@ -77,8 +83,28 @@ __m128i test_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
// CHECK-LABEL: test_mm_maskz_permutex2var_epi8
// CHECK: call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
- return _mm_maskz_permutex2var_epi8(__U, __A, __I, __B);
-}
+ return _mm_maskz_permutex2var_epi8(__U, __A, __I, __B);
+}
+
+TEST_CONSTEXPR(match_v16qu(
+ _mm_permutex2var_epi8((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16},
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 1, 101, 2, 102, 3, 103, 4, 104,
+ 5, 105, 6, 106, 7, 107, 8, 108));
+TEST_CONSTEXPR(match_v16qu(
+ _mm_mask_permutex2var_epi8((__m128i)(__v16qu){200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215},
+ 0xAAAA,
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116}),
+ 200, 101, 202, 102, 204, 103, 206, 104,
+ 208, 105, 210, 106, 212, 107, 214, 108));
__m256i test_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi8
@@ -97,8 +123,44 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i _
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi8
// CHECK: call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
- return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B);
-}
+ return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B);
+}
+
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_permutex2var_epi8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32},
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132}),
+ 1, 101, 2, 102, 3, 103, 4, 104,
+ 5, 105, 6, 106, 7, 107, 8, 108,
+ 9, 109, 10, 110, 11, 111, 12, 112,
+ 13, 113, 14, 114, 15, 115, 16, 116));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_mask_permutex2var_epi8((__m256i)(__v32qu){200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231},
+ 0xAAAAAAAA,
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35,
+ 4, 36, 5, 37, 6, 38, 7, 39,
+ 8, 40, 9, 41, 10, 42, 11, 43,
+ 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132}),
+ 200, 101, 202, 102, 204, 103, 206, 104,
+ 208, 105, 210, 106, 212, 107, 214, 108,
+ 216, 109, 218, 110, 220, 111, 222, 112,
+ 224, 113, 226, 114, 228, 115, 230, 116));
__m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_multishift_epi64_epi8
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 33c43977f72dc..121d5bf8d4adb 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -5610,12 +5610,23 @@ __m128i test_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask2_permutex2var_epi32(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask2_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6}, 0x05,
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 3, 100, 6));
__m256i test_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
// CHECK-LABEL: test_mm256_mask2_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask2_permutex2var_epi32(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask2_permutex2var_epi32((__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ 0xA5,
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 7, 100, 15, 1, 110, 2, 120));
__m128d test_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
// CHECK-LABEL: test_mm_mask2_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
@@ -5646,149 +5657,255 @@ __m128i test_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask2_permutex2var_epi64(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask2_permutex2var_epi64((__m128i)(__v2di){10, 20},
+ (__m128i)(__v2di){0, 5}, 0x1,
+ (__m128i)(__v2di){100, 200}),
+ 10, 5));
__m256i test_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
// CHECK-LABEL: test_mm256_mask2_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask2_permutex2var_epi64(__A,__I,__U,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask2_permutex2var_epi64((__m256i)(__v4di){0, 10, 20, 30},
+ (__m256i)(__v4di){0, 1, 4, 5}, 0x5,
+ (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 1, 100, 5));
__m128i test_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
return _mm_permutex2var_epi32(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 40, 100, 300));
__m128i test_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_permutex2var_epi32(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_permutex2var_epi32((__m128i)(__v4si){-1, -2, -3, -4}, 0x0A,
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ -1, -4, -3, 300));
__m128i test_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.128
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_permutex2var_epi32(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_maskz_permutex2var_epi32(0x0A, (__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 0, 40, 0, 300));
__m256i test_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
return _mm256_permutex2var_epi32(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_permutex2var_epi32((__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 70, 100, 170, 10, 110, 20, 120));
__m256i test_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_permutex2var_epi32(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_permutex2var_epi32((__m256i)(__v8si){-1, -2, -3, -4, -5, -6, -7, -8}, 0xAA,
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ -1, -8, -3, 170, -5, 110, -7, 120));
__m256i test_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi32
// CHECK: @llvm.x86.avx512.vpermi2var.d.256
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_permutex2var_epi32(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_maskz_permutex2var_epi32(0xAA, (__m256i)(__v8si){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){100, 110, 120, 130, 140, 150, 160, 170}),
+ 0, 70, 0, 170, 0, 110, 0, 120));
__m128d test_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
return _mm_permutex2var_pd(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutex2var_pd((__m128d){1.0, 2.0}, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ 1.0, 10.0));
__m128d test_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutex2var_pd(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_permutex2var_pd((__m128d){-1.0, -2.0}, 0x2, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ -1.0, 10.0));
__m128d test_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.128
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permutex2var_pd(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_maskz_permutex2var_pd(0x2, (__m128d){1.0, 2.0}, (__m128i)(__v2di){0, 2}, (__m128d){10.0, 20.0}),
+ 0.0, 10.0));
__m256d test_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
return _mm256_permutex2var_pd(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutex2var_pd((__m256d){1.0, 2.0, 3.0, 4.0}, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ 1.0, 10.0, 2.0, 20.0));
__m256d test_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permutex2var_pd(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_permutex2var_pd((__m256d){-1.0, -2.0, -3.0, -4.0}, 0x2, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ -1.0, 10.0, -3.0, -4.0));
__m256d test_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_pd
// CHECK: @llvm.x86.avx512.vpermi2var.pd.256
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permutex2var_pd(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_maskz_permutex2var_pd(0x2, (__m256d){1.0, 2.0, 3.0, 4.0}, (__m256i)(__v4di){0, 4, 1, 5}, (__m256d){10.0, 20.0, 30.0, 40.0}),
+ 0.0, 10.0, 0.0, 0.0));
__m128 test_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
return _mm_permutex2var_ps(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutex2var_ps((__m128){1.f, 2.f, 3.f, 4.f}, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ 1.f, 4.f, 10.f, 30.f));
__m128 test_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permutex2var_ps(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_permutex2var_ps((__m128){-1.f, -2.f, -3.f, -4.f}, 0x0A, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ -1.f, -4.f, -3.f, 30.f));
__m128 test_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.128
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permutex2var_ps(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_maskz_permutex2var_ps(0x0A, (__m128){1.f, 2.f, 3.f, 4.f}, (__m128i)(__v4si){0, 3, 4, 6}, (__m128){10.f, 20.f, 30.f, 40.f}),
+ 0.f, 4.f, 0.f, 30.f));
__m256 test_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
return _mm256_permutex2var_ps(__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutex2var_ps((__m256){0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ 0.f, 7.f, 10.f, 17.f, 1.f, 11.f, 2.f, 12.f));
__m256 test_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permutex2var_ps(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_permutex2var_ps((__m256){-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f}, 0xAA, (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10}, (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ -1.f, -8.f, -3.f, 17.f, -5.f, 11.f, -7.f, 12.f));
__m256 test_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_ps
// CHECK: @llvm.x86.avx512.vpermi2var.ps.256
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permutex2var_ps(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_maskz_permutex2var_ps(0xAA, (__m256){0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}, (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10}, (__m256){10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f}),
+ 0.f, 7.f, 0.f, 17.f, 0.f, 11.f, 0.f, 12.f));
__m128i test_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
return _mm_permutex2var_epi64(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_permutex2var_epi64((__m128i)(__v2di){10, 20}, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ 10, 200));
__m128i test_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_mask_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_permutex2var_epi64(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_permutex2var_epi64((__m128i)(__v2di){-1, -2}, 0x2, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ -1, 200));
__m128i test_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.128
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_permutex2var_epi64(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_maskz_permutex2var_epi64(0x2, (__m128i)(__v2di){10, 20}, (__m128i)(__v2di){0, 3}, (__m128i)(__v2di){100, 200}),
+ 0, 200));
__m256i test_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
return _mm256_permutex2var_epi64(__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_permutex2var_epi64((__m256i)(__v4di){0, 10, 20, 30}, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 10, 100, 110));
__m256i test_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_permutex2var_epi64(__A,__U,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_permutex2var_epi64((__m256i)(__v4di){-1, -2, -3, -4}, 0x5, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ -1, -2, 100, -4));
__m256i test_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_permutex2var_epi64
// CHECK: @llvm.x86.avx512.vpermi2var.q.256
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_permutex2var_epi64(__U,__A,__I,__B);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_maskz_permutex2var_epi64(0x5, (__m256i)(__v4di){0, 10, 20, 30}, (__m256i)(__v4di){0, 1, 4, 5}, (__m256i)(__v4di){100, 110, 120, 130}),
+ 0, 0, 100, 0));
+TEST_CONSTEXPR(match_v4si(
+ _mm_permutex2var_epi32((__m128i)(__v4si){10, 20, 30, 40},
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ 10, 40, 100, 300));
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_permutex2var_epi32((__m128i)(__v4si){-1, -2, -3, -4}, 0x0A,
+ (__m128i)(__v4si){0, 3, 4, 6},
+ (__m128i)(__v4si){100, 200, 300, 400}),
+ -1, -4, -3, 300));
__m128i test_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepi8_epi32
// CHECK: sext <4 x i8> %{{.*}} to <4 x i32>
@@ -10472,6 +10589,17 @@ __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,0,0));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xAAu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 0,0,0,0, 0,4,0,4));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xFFu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,4,4));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_permutex2var_epi32((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){101, 102, 103, 104, 105, 106, 107, 108}),
+ 1, 8, 101, 108, 2, 102, 3, 103));
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_permutex2var_epi32((__m256i)(__v8si){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m256i)(__v8si){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m256i)(__v8si){101, 102, 103, 104, 105, 106, 107, 108}),
+ -1, -8, -3, 108, -5, 102, -7, 103));
__m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_mov_pd
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index febef46458ae9..172a3cb219c8a 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -1887,6 +1887,67 @@ __m256i test_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_permutex2var_epi16(__U,__A,__I,__B);
}
+
+TEST_CONSTEXPR(match_v8hi(
+ _mm_permutex2var_epi16((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150, 160,
+ 170}),
+ 0, 70, 100, 170, 10, 110, 20, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask_permutex2var_epi16((__m128i)(__v8hi){-1, -2, -3, -4, -5, -6, -7, -8},
+ 0xAA,
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ -1, -8, -3, 170, -5, 110, -7, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_maskz_permutex2var_epi16(0xAA,
+ (__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ 0, 70, 0, 170, 0, 110, 0, 120));
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask2_permutex2var_epi16((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70},
+ (__m128i)(__v8hi){0, 7, 8, 15, 1, 9, 2, 10},
+ 0x55,
+ (__m128i)(__v8hi){100, 110, 120, 130, 140, 150,
+ 160, 170}),
+ 0, 7, 100, 15, 10, 9, 20, 10));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_permutex2var_epi16(
+ (__m256i)(__v16hi){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 150, 200, 350, 10, 210, 20, 220,
+ 30, 230, 40, 240, 50, 250, 60, 260));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_mask_permutex2var_epi16(
+ (__m256i)(__v16hi){-1, -2, -3, -4, -5, -6, -7, -8,
+ -9, -10, -11, -12, -13, -14, -15, -16},
+ 0xAAAA,
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ -1, -16, -3, 350, -5, 210, -7, 220,
+ -9, 230, -11, 240, -13, 250, -15, 260));
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_maskz_permutex2var_epi16(
+ 0x5555,
+ (__m256i)(__v16hi){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 130, 140, 150},
+ (__m256i)(__v16hi){0, 15, 16, 31, 1, 17, 2, 18,
+ 3, 19, 4, 20, 5, 21, 6, 22},
+ (__m256i)(__v16hi){200, 210, 220, 230, 240, 250, 260, 270,
+ 280, 290, 300, 310, 320, 330, 340, 350}),
+ 0, 0, 200, 0, 10, 0, 20, 0,
+ 30, 0, 40, 0, 50, 0, 60, 0));
+
__m128i test_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_maddubs_epi16
// CHECK: @llvm.x86.ssse3.pmadd.ub.sw
@@ -3596,3 +3657,22 @@ void test_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i _
// CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.256
_mm256_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A);
}
+
+
+TEST_CONSTEXPR(match_v16qu(
+ _mm_permutex2var_epi8((__m128i)(__v16qu){0, 10, 20, 30, 40, 50, 60, 70,
+ 80, 90, 100, 110, 120, 127, 126, 125},
+ (__m128i)(__v16qu){0, 16, 1, 17, 2, 18, 3, 19,
+ 4, 20, 5, 21, 6, 22, 7, 23},
+ (__m128i)(__v16qu){100, 110, 120, 130, 140, 150, 160, 170,
+ 180, 190, 200, 210, 220, 230, 240, 250}),
+ 0, 100, 10, 110, 20, 120, 30, 130,
+ 40, 140, 50, 150, 60, 160, 70, 170));
+TEST_CONSTEXPR(match_v32qu(
+ _mm256_permutex2var_epi8((__m256i)(__v32qu){0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109},
+ (__m256i)(__v32qu){0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47},
+ (__m256i)(__v32qu){200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231}),
+ 0, 200, 10, 201, 20, 202, 30, 203,
+ 40, 204, 50, 205, 60, 206, 70, 207,
+ 80, 208, 90, 209, 100, 210, 110, 211,
+ 120, 212, 127, 213, 126, 214, 125, 215));
>From 09f3ce705519d49320dcdae4ba30ac55a1d5ff7f Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Wed, 29 Oct 2025 08:55:14 -0700
Subject: [PATCH 2/2] Fix style issues
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 24 ++++++++++++------------
clang/lib/AST/ExprConstant.cpp | 24 ++++++++++++------------
2 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 3731df0dd1699..fbf395877c5dd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4434,9 +4434,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x1;
+ unsigned Offset = ShuffleMask & 0x1;
unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_vpermi2vard128:
case X86::BI__builtin_ia32_vpermi2varps128:
@@ -4444,9 +4444,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vpermi2varpd256:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x3;
+ unsigned Offset = ShuffleMask & 0x3;
unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_vpermi2varhi128:
case X86::BI__builtin_ia32_vpermi2vard256:
@@ -4455,9 +4455,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vpermi2varpd512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x7;
+ unsigned Offset = ShuffleMask & 0x7;
unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_vpermi2varqi128:
case X86::BI__builtin_ia32_vpermi2varhi256:
@@ -4465,24 +4465,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vpermi2varps512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0xF;
+ unsigned Offset = ShuffleMask & 0xF;
unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_vpermi2varqi256:
case X86::BI__builtin_ia32_vpermi2varhi512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x1F;
+ unsigned Offset = ShuffleMask & 0x1F;
unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_vpermi2varqi512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x3F;
+ unsigned Offset = ShuffleMask & 0x3F;
unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
});
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 21fae8937a36a..5ebd2ead70f82 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13070,9 +13070,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x1;
+ unsigned Offset = ShuffleMask & 0x1;
unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
@@ -13084,9 +13084,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x3;
+ unsigned Offset = ShuffleMask & 0x3;
unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
@@ -13099,9 +13099,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x7;
+ unsigned Offset = ShuffleMask & 0x7;
unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
@@ -13113,9 +13113,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0xF;
+ unsigned Offset = ShuffleMask & 0xF;
unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
@@ -13125,9 +13125,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x1F;
+ unsigned Offset = ShuffleMask & 0x1F;
unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
@@ -13136,9 +13136,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
- unsigned offset = ShuffleMask & 0x3F;
+ unsigned Offset = ShuffleMask & 0x3F;
unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
- return std::pair<unsigned, unsigned>{SrcIdx, offset};
+ return std::pair<unsigned, unsigned>{SrcIdx, Offset};
}))
return false;
return Success(R, E);
More information about the cfe-commits
mailing list