[clang] [X86] Allow PSHUFD/PSHUFLW/PSHUFW intrinsics in constexpr. (PR #161210)
Nagraj Gaonkar via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 29 12:19:53 PDT 2025
https://github.com/NagrajMG updated https://github.com/llvm/llvm-project/pull/161210
>From 633a986a1e49698e05d109d69fafaa20989aa6c9 Mon Sep 17 00:00:00 2001
From: NagrajMG <nagrajgaonkarmumbai749 at gmail.com>
Date: Mon, 29 Sep 2025 20:08:07 +0530
Subject: [PATCH 1/2] FIxes #156611: Allow PSHUFD/PSHUFLW/PSHUFW intrinsics in
constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 45 ++-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 245 ++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 287 +++++++++++++++++++
clang/lib/Headers/mmintrin.h | 5 +
clang/test/CodeGen/X86/avx2-builtins.c | 6 +-
clang/test/CodeGen/X86/avx512bw-builtins.c | 11 +-
clang/test/CodeGen/X86/avx512f-builtins.c | 9 +-
clang/test/CodeGen/X86/avx512vl-builtins.c | 17 ++
clang/test/CodeGen/X86/avx512vlbw-builtins.c | 50 ++++
clang/test/CodeGen/X86/mmx-builtins.c | 2 +-
clang/test/CodeGen/X86/sse2-builtins.c | 6 +-
11 files changed, 663 insertions(+), 20 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..e70691a30627a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetch : X86LibBuiltin<"void(void *)">;
}
+let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
+ def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant int)">;
+}
+
// PRFCHW
let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
@@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
+ def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -584,9 +591,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
- def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
- def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
- def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -647,6 +651,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
+
+ def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
+ def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
+ def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1990,13 +1998,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
}
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
- def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+ def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
+ def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -2016,21 +2024,35 @@ let Features = "avx512f",
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
+ def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
+ def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
+ def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
+ def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+ def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
+ def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
+ def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
+ def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
+ def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
+ def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
+ def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
}
-let Features = "avx512f",
- Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
+ def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, _Vector<16, int>, unsigned short)">;
+ def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, unsigned short)">;
+ def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
}
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
@@ -2047,10 +2069,14 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, _Vector<4, int>, unsigned char)">;
+ def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, unsigned char)">;
}
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
+ def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, _Vector<8, int>, unsigned char)">;
+ def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, unsigned char)">;
}
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
@@ -3266,7 +3292,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
}
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d4e6ed0..e0bd5d531db34 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = 128u / ElemBits;
+ const unsigned Half = 4;
+ assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ unsigned srcIdx;
+ if (inLane < Half) {
+ const unsigned pos = inLane;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else {
+ srcIdx = i;
+ }
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ (void)OpPC;
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = 128u / ElemBits;
+ const unsigned HalfBase = 4;
+ assert(NumElems % LaneElems == 0);
+
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ unsigned srcIdx;
+ if (inLane >= HalfBase) {
+ const unsigned pos = inLane - HalfBase;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ srcIdx = laneBase + HalfBase + sel;
+ } else {
+ srcIdx = i;
+ }
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ (void)OpPC;
+ const unsigned NumArgs = Call->getNumArgs();
+ assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
+
+ APSInt K;
+ Pointer SrcPT;
+ const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
+ const bool IsMaskZ = (NumArgs == 3);
+
+ if (NumArgs == 4) {
+ K = popToAPSInt(S, Call->getArg(3));
+ SrcPT = S.Stk.pop<Pointer>();
+ } else if (NumArgs == 3) {
+ K = popToAPSInt(S, Call->getArg(2));
+ }
+
+ APSInt Imm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const unsigned NumElems = Dst.getNumElems();
+ const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
+
+ const unsigned ElemBits = 32;
+ const unsigned LaneElems = 128u / ElemBits;
+ assert(NumElems % LaneElems == 0);
+
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+ const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
+ const unsigned srcIdx = laneBase + sel;
+
+ APSInt Chosen;
+ INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+
+ if (!HasMask) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ continue;
+ }
+
+ const bool Keep =
+ (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
+ if (Keep) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Chosen); });
+ } else if (IsMaskZ) {
+ APSInt Zero(APInt(Chosen.getBitWidth(), 0));
+ Zero.setIsSigned(Chosen.isSigned());
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(i) = static_cast<T>(Zero); });
+ } else {
+ APSInt PT;
+ INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
+ }
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3417,6 +3629,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
llvm::APIntOps::mulhs);
+ case clang::X86::BI__builtin_ia32_pshuflw:
+ case clang::X86::BI__builtin_ia32_pshuflw256:
+ case clang::X86::BI__builtin_ia32_pshuflw512:
+ case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
+ return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);
+
+ case clang::X86::BI__builtin_ia32_pshufhw:
+ case clang::X86::BI__builtin_ia32_pshufhw256:
+ case clang::X86::BI__builtin_ia32_pshufhw512:
+ case clang::X86::BI__builtin_ia32_pshufhw128_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw256_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw512_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
+ return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);
+
+ case clang::X86::BI__builtin_ia32_pshufd:
+ case clang::X86::BI__builtin_ia32_pshufd256:
+ case clang::X86::BI__builtin_ia32_pshufd512:
+ case clang::X86::BI__builtin_ia32_pshufd128_mask:
+ case clang::X86::BI__builtin_ia32_pshufd256_mask:
+ case clang::X86::BI__builtin_ia32_pshufd512_mask:
+ case clang::X86::BI__builtin_ia32_pshufd128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd512_maskz:
+ return interp__builtin_ia32_pshufd_common(S, OpPC, Call);
+
case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..1ce601d37e0d6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11869,6 +11869,293 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_pshufw: {
+ APValue Src;
+ APSInt Imm;
+ if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false;
+ if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false;
+
+ unsigned N = Src.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(N);
+
+ uint8_t C = static_cast<uint8_t>(Imm.getZExtValue());
+ for (unsigned i = 0; i != N; ++i) {
+ unsigned sel = (C >> (2 * i)) & 0x3;
+ ResultElements.push_back(Src.getVectorElt(sel));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
+ case clang::X86::BI__builtin_ia32_pshuflw:
+ case clang::X86::BI__builtin_ia32_pshuflw256:
+ case clang::X86::BI__builtin_ia32_pshuflw512:
+ case clang::X86::BI__builtin_ia32_pshuflw128_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw256_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw512_mask:
+ case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshuflw512_maskz: {
+ const unsigned BID = E->getBuiltinCallee();
+
+ const bool IsMask =
+ BID == clang::X86::BI__builtin_ia32_pshuflw128_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw256_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw512_mask;
+
+ const bool IsMaskZ =
+ BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz;
+
+ const unsigned AIdx = 0, ImmIdx = 1;
+ const unsigned SrcIdx = 2;
+ const unsigned KIdx = IsMaskZ ? 2 : 3;
+
+ APValue AVal, SrcVal;
+ APSInt Imm, K;
+ if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
+ if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
+
+ const APSInt *KPtr = nullptr;
+ const APValue *PassThru = nullptr;
+ bool ZeroInactive = false;
+
+ if (IsMask) {
+ if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
+ } else if (IsMaskZ) {
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = nullptr; ZeroInactive = true;
+ }
+
+ const auto *VT = E->getType()->getAs<VectorType>();
+ if (!VT) return false;
+ const unsigned NumElts = VT->getNumElements();
+
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
+ const unsigned Half = 4;
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+ const bool DestUnsigned =
+ VT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ auto MakeZero = [&]() -> APValue {
+ return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
+ };
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(NumElts);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ APValue Chosen;
+ if (inLane < Half) {
+ const unsigned pos = inLane;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ const unsigned srcIdx = laneBase + sel;
+ Chosen = AVal.getVectorElt(srcIdx);
+ } else {
+ Chosen = AVal.getVectorElt(i);
+ }
+
+ if (KPtr) {
+ const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
+ if (Keep) {
+ ResultElements.push_back(Chosen);
+ } else if (ZeroInactive) {
+ ResultElements.push_back(MakeZero());
+ } else {
+ const APValue &PT = PassThru ? PassThru->getVectorElt(i)
+ : AVal.getVectorElt(i);
+ ResultElements.push_back(PT);
+ }
+ } else {
+ ResultElements.push_back(Chosen);
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
+ case clang::X86::BI__builtin_ia32_pshufhw:
+ case clang::X86::BI__builtin_ia32_pshufhw256:
+ case clang::X86::BI__builtin_ia32_pshufhw512:
+ case clang::X86::BI__builtin_ia32_pshufhw128_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw256_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw512_mask:
+ case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufhw512_maskz: {
+ const unsigned BID = E->getBuiltinCallee();
+
+ const bool IsMask =
+ BID == clang::X86::BI__builtin_ia32_pshufhw128_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshufhw256_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshufhw512_mask;
+
+ const bool IsMaskZ =
+ BID == clang::X86::BI__builtin_ia32_pshufhw128_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshufhw256_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshufhw512_maskz;
+
+ const unsigned AIdx = 0, ImmIdx = 1;
+ const unsigned SrcIdx = 2;
+ const unsigned KIdx = IsMaskZ ? 2 : 3;
+
+ APValue AVal, SrcVal;
+ APSInt Imm, K;
+ if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
+ if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
+
+ const APSInt *KPtr = nullptr;
+ const APValue *PassThru = nullptr;
+ bool ZeroInactive = false;
+ if (IsMask) {
+ if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
+ } else if (IsMaskZ) {
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = nullptr; ZeroInactive = true;
+ }
+
+ const auto *VT = E->getType()->getAs<VectorType>();
+ if (!VT) return false;
+ const unsigned NumElts = VT->getNumElements();
+ const unsigned ElemBits = 16;
+ const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
+ const unsigned Half = 4;
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+ const bool DestUnsigned =
+ VT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ auto MakeZero = [&]() -> APValue {
+ return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
+ };
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(NumElts);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ APValue Chosen;
+ if (inLane >= Half) {
+ const unsigned pos = inLane - Half;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ const unsigned srcIdx = laneBase + Half + sel;
+ Chosen = AVal.getVectorElt(srcIdx);
+ } else {
+ Chosen = AVal.getVectorElt(i);
+ }
+
+ if (KPtr) {
+ const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
+ if (Keep) {
+ ResultElements.push_back(Chosen);
+ } else if (ZeroInactive) {
+ ResultElements.push_back(MakeZero());
+ } else {
+ const APValue &PT = PassThru ? PassThru->getVectorElt(i)
+ : AVal.getVectorElt(i);
+ ResultElements.push_back(PT);
+ }
+ } else {
+ ResultElements.push_back(Chosen);
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
+ case clang::X86::BI__builtin_ia32_pshufd:
+ case clang::X86::BI__builtin_ia32_pshufd256:
+ case clang::X86::BI__builtin_ia32_pshufd512:
+ case clang::X86::BI__builtin_ia32_pshufd128_mask:
+ case clang::X86::BI__builtin_ia32_pshufd256_mask:
+ case clang::X86::BI__builtin_ia32_pshufd512_mask:
+ case clang::X86::BI__builtin_ia32_pshufd128_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd256_maskz:
+ case clang::X86::BI__builtin_ia32_pshufd512_maskz: {
+ const unsigned BID = E->getBuiltinCallee();
+
+ const bool IsMask =
+ BID == clang::X86::BI__builtin_ia32_pshufd512_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshufd128_mask ||
+ BID == clang::X86::BI__builtin_ia32_pshufd256_mask;
+
+ const bool IsMaskZ =
+ BID == clang::X86::BI__builtin_ia32_pshufd512_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshufd128_maskz ||
+ BID == clang::X86::BI__builtin_ia32_pshufd256_maskz;
+
+ const unsigned AIdx = 0, ImmIdx = 1;
+ const unsigned SrcIdx = 2;
+ const unsigned KIdx = IsMaskZ ? 2 : 3;
+
+ APValue AVal, SrcVal;
+ APSInt Imm, K;
+ if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
+ if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
+
+ const APSInt *KPtr = nullptr;
+ const APValue *PassThru = nullptr;
+ bool ZeroInactive = false;
+ if (IsMask) {
+ if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
+ } else if (IsMaskZ) {
+ if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
+ KPtr = &K; PassThru = nullptr; ZeroInactive = true;
+ }
+
+ const auto *VT = E->getType()->getAs<VectorType>();
+ if (!VT) return false;
+ const unsigned NumElts = VT->getNumElements();
+ const unsigned ElemBits = 32;
+ const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
+ const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+ const bool DestUnsigned =
+ VT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ auto MakeZero = [&]() -> APValue {
+ return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
+ };
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(NumElts);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ const unsigned laneBase = (i / LaneElems) * LaneElems;
+ const unsigned inLane = i % LaneElems;
+
+ const unsigned pos = inLane & 3;
+ const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
+ const unsigned srcIdx = laneBase + sel;
+ APValue Chosen = AVal.getVectorElt(srcIdx);
+
+ if (KPtr) {
+ const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
+ if (Keep) {
+ ResultElements.push_back(Chosen);
+ } else if (ZeroInactive) {
+ ResultElements.push_back(MakeZero());
+ } else {
+ const APValue &PT = PassThru ? PassThru->getVectorElt(i)
+ : AVal.getVectorElt(i);
+ ResultElements.push_back(PT);
+ }
+ } else {
+ ResultElements.push_back(Chosen);
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 5f617530b6f78..01b5cea02cb1c 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -39,14 +39,19 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS_MMX \
+ __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
+
#define __DEFAULT_FN_ATTRS_SSE2 \
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
__min_vector_width__(128)))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
+#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX constexpr
#else
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
+#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX
#endif
#define __trunc64(x) \
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index eff2797e87c75..4299b18243f21 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1109,19 +1109,19 @@ __m256i test_mm256_shuffle_epi32(__m256i a) {
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
return _mm256_shuffle_epi32(a, 15);
}
-
+TEST_CONSTEXPR(match_v8si(_mm256_shuffle_epi32((((__m256i)(__v8si){0,1,2,3,4,5,6,7})), 15), 3,3,0,0, 7,7,4,4));
__m256i test_mm256_shufflehi_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_shufflehi_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
return _mm256_shufflehi_epi16(a, 107);
}
-
+TEST_CONSTEXPR(match_v16hi(_mm256_shufflehi_epi16((((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 107), 0,1,2,3, 7,6,6,5, 8,9,10,11, 15,14,14,13));
__m256i test_mm256_shufflelo_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_shufflelo_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
return _mm256_shufflelo_epi16(a, 83);
}
-
+TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi8
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 3f42ac0268978..bd19363c8d948 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1876,13 +1876,15 @@ __m512i test_mm512_shufflehi_epi16(__m512i __A) {
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
return _mm512_shufflehi_epi16(__A, 5);
}
-
+TEST_CONSTEXPR(match_v32hi(_mm512_shufflehi_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,2,3, 5,5,4,4, 8,9,10,11, 13,13,12,12, 16,17,18,19, 21,21,20,20, 24,25,26,27, 29,29,28,28));
__m512i test_mm512_mask_shufflehi_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_shufflehi_epi16
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_shufflehi_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16((((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131})), 0xFFFF0000u, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, 16,17,18,19,21,21,20,20, 24,25,26,27,29,29,28,28));
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131));
__m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_shufflehi_epi16
@@ -1890,12 +1892,15 @@ __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_shufflehi_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0xAAAAAAAAu, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,0,3,0,5,0,4, 0,9,0,11,0,13,0,12, 0,17,0,19,0,21,0,20, 0,25,0,27,0,29,0,28));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
__m512i test_mm512_shufflelo_epi16(__m512i __A) {
// CHECK-LABEL: test_mm512_shufflelo_epi16
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
return _mm512_shufflelo_epi16(__A, 5);
}
+TEST_CONSTEXPR( match_v32hi(_mm512_shufflelo_epi16(((__m512i)(__v32hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15, 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31}), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
__m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_shufflelo_epi16
@@ -1903,6 +1908,8 @@ __m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A)
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_shufflelo_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131));
__m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_shufflelo_epi16
@@ -1910,6 +1917,8 @@ __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_shufflelo_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
__m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_sllv_epi16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 84eaad8d99e61..47cb485a84210 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9073,20 +9073,25 @@ __m512i test_mm512_shuffle_epi32(__m512i __A) {
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
return _mm512_shuffle_epi32(__A, 1);
}
-
+TEST_CONSTEXPR(match_v16si(_mm512_shuffle_epi32((((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12));
__m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_shuffle_epi32
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_epi32(__W, __U, __A, 1);
}
-
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x0000u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x00FFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 200,201,202,203,204,205,206,207));
__m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_shuffle_epi32
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_epi32(__U, __A, 1);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x5555u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,0,4,0, 9,0,8,0, 13,0,12,0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x8001u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,12));
__m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_expand_pd
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5282c7ab06dea..88006232c5c99 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -10025,6 +10025,11 @@ __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
return _mm_mask_shuffle_epi32(__W, __U, __A, 1);
}
+TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 1), 1,0,0,0));
+TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Au, ((__m128i)(__v4si){0,1,2,3}), 1), 100,0,102,0));
+TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x05u, ((__m128i)(__v4si){0,1,2,3}), 1), 1,101,0,103));
+TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x00u, ((__m128i)(__v4si){0,1,2,3}), 1), 100,101,102,103));
+
__m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_shuffle_epi32
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
@@ -10032,6 +10037,10 @@ __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
return _mm_maskz_shuffle_epi32(__U, __A, 2);
}
+TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x01u, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Au, ((__m128i)(__v4si){0,1,2,3}), 2), 0,0,0,0));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0));
+
__m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_shuffle_epi32
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
@@ -10039,6 +10048,10 @@ __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
return _mm256_mask_shuffle_epi32(__W, __U, __A, 2);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0xF0u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103, 6,4,4,4));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,102,103, 6,4,106,107));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x00u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103,104,105,106,107));
+
__m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_shuffle_epi32
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
@@ -10046,6 +10059,10 @@ __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
return _mm256_maskz_shuffle_epi32(__U, __A, 2);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,0,0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xAAu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 0,0,0,0, 0,4,0,4));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xFFu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,4,4));
+
__m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_mov_pd
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 6c9c80efcef9d..1fe1ec08ede88 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3393,6 +3393,13 @@ __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
return _mm_mask_shufflehi_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,5,5,4,4));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,104,105,106,107));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,101,2,103,5,105,4,107));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,3,104,5,106,4));
+
__m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_shufflehi_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
@@ -3400,6 +3407,13 @@ __m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) {
return _mm_maskz_shufflehi_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,5,5,4,4));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,0,0,0,0));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,2,0,5,0,4,0));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,3,0,5,0,4));
+
__m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_shufflelo_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -3407,6 +3421,13 @@ __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
return _mm_mask_shufflelo_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,104,105,106,107));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,4,5,6,7));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,0,104,5,106,7));
+TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,101,0,103,4,105,6,107));
+
__m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_shufflelo_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -3414,6 +3435,12 @@ __m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) {
return _mm_maskz_shufflelo_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,4,5,6,7));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,0,0,5,0,7));
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,0,0,0,4,0,6,0));
+
__m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_shufflehi_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
@@ -3421,6 +3448,12 @@ __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A)
return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,8,9,10,11,13,13,12,12));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,108,109,110,111,112,113,114,115));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,101,2,103,5,105,4,107,8,109,10,111,13,113,12,115));
+
__m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_shufflehi_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
@@ -3428,6 +3461,13 @@ __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) {
return _mm256_maskz_shufflehi_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,0,0,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,8,9,10,11,13,13,12,12));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,2,0,5,0,4,0,8,0,10,0,13,0,12,0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xAAAAu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,0,3,0,5,0,4,0,9,0,11,0,13,0,12));
+
__m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_shufflelo_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
@@ -3435,6 +3475,11 @@ __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A)
return _mm256_mask_shufflelo_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,204,205,206,207));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,200,201,202,203,204,205,206,207));
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xF00Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,12,13,14,15));
+
__m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_shufflelo_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
@@ -3442,6 +3487,11 @@ __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) {
return _mm256_maskz_shufflelo_epi16(__U, __A, 5);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,0,0,0,0,0,0,0,0));
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xF0F0u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,4,5,6,7,0,0,0,0,12,13,14,15));
+
void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
{
// CHECK-LABEL: test_mm_mask_cvtepi16_storeu_epi8
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 26c5f7315457e..5156d070bcde7 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -584,7 +584,7 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
// CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
return _mm_shuffle_pi16(a, 3);
}
-
+TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi8
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 84b90c09444c2..83cb4a63f4e3f 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1299,7 +1299,7 @@ __m128i test_mm_shuffle_epi32(__m128i A) {
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
return _mm_shuffle_epi32(A, 0);
}
-
+TEST_CONSTEXPR(match_v4si(_mm_shuffle_epi32(((__m128i)(__v4si){0,1,2,3}), 0), 0,0,0,0));
__m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_shuffle_pd
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
@@ -1311,13 +1311,13 @@ __m128i test_mm_shufflehi_epi16(__m128i A) {
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
return _mm_shufflehi_epi16(A, 0);
}
-
+TEST_CONSTEXPR(match_v8hi(_mm_shufflehi_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,1,2,3, 4,4,4,4));
__m128i test_mm_shufflelo_epi16(__m128i A) {
// CHECK-LABEL: test_mm_shufflelo_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
return _mm_shufflelo_epi16(A, 0);
}
-
+TEST_CONSTEXPR(match_v8hi(_mm_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,0,0,0, 4,5,6,7));
__m128i test_mm_sll_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sll_epi16
// CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
>From de765b0958a52f76b3405aafedc34374af771ea4 Mon Sep 17 00:00:00 2001
From: NagrajMG <nagrajgaonkarmumbai749 at gmail.com>
Date: Tue, 30 Sep 2025 00:49:21 +0530
Subject: [PATCH 2/2] [X86] Allow PSHUFD/PSHUFLW/PSHUFW intrinsics in constexpr
---
clang/include/clang/Basic/BuiltinsX86.td | 20 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 293 ++++-------------
clang/lib/AST/ExprConstant.cpp | 394 ++++++-----------------
clang/lib/Headers/mmintrin.h | 5 -
4 files changed, 171 insertions(+), 541 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e70691a30627a..b320842c5486e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1025,6 +1025,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
+ def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
}
let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -2024,35 +2025,20 @@ let Features = "avx512f",
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
- def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
- def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
- def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
- def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
- def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
- def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
- def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
}
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
- def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
- def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
- def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
}
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
- def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, _Vector<16, int>, unsigned short)">;
- def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, unsigned short)">;
- def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
}
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
@@ -2069,14 +2055,10 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, _Vector<4, int>, unsigned char)">;
- def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, unsigned char)">;
}
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
- def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, _Vector<8, int>, unsigned char)">;
- def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, unsigned char)">;
}
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e0bd5d531db34..e7ec8beb2ba81 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2862,214 +2862,64 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
- const unsigned NumArgs = Call->getNumArgs();
- assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
- APSInt K;
- Pointer SrcPT;
- const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
- const bool IsMaskZ = (NumArgs == 3);
- if (NumArgs == 4) {
- K = popToAPSInt(S, Call->getArg(3));
- SrcPT = S.Stk.pop<Pointer>();
- } else if (NumArgs == 3) {
- K = popToAPSInt(S, Call->getArg(2));
- }
-
- APSInt Imm = popToAPSInt(S, Call->getArg(1));
- const Pointer &Src = S.Stk.pop<Pointer>();
- const Pointer &Dst = S.Stk.peek<Pointer>();
- const unsigned NumElems = Dst.getNumElems();
- const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
- const unsigned ElemBits = 16;
- const unsigned LaneElems = 128u / ElemBits;
- const unsigned Half = 4;
- assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
-
- for (unsigned i = 0; i != NumElems; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
-
- unsigned srcIdx;
- if (inLane < Half) {
- const unsigned pos = inLane;
- const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
- srcIdx = laneBase + sel;
- } else {
- srcIdx = i;
- }
-
- APSInt Chosen;
- INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
-
- if (!HasMask) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- continue;
- }
-
- const bool Keep =
- (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
-
- if (Keep) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- } else if (IsMaskZ) {
- APSInt Zero(APInt(Chosen.getBitWidth(), 0));
- Zero.setIsSigned(Chosen.isSigned());
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Zero); });
- } else {
- APSInt PT;
- INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
- INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
- }
- }
+enum class Half { None, Low, High };
- Dst.initializeAllElements();
- return true;
-}
-
-static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
- (void)OpPC;
- const unsigned NumArgs = Call->getNumArgs();
- assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
-
- APSInt K;
- Pointer SrcPT;
- const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
- const bool IsMaskZ = (NumArgs == 3);
-
- if (NumArgs == 4) {
- K = popToAPSInt(S, Call->getArg(3));
- SrcPT = S.Stk.pop<Pointer>();
- } else if (NumArgs == 3) {
- K = popToAPSInt(S, Call->getArg(2));
- }
-
- APSInt Imm = popToAPSInt(S, Call->getArg(1));
- const Pointer &Src = S.Stk.pop<Pointer>();
+static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ Half whichHalf) {
+ assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
+ APSInt controlImm = popToAPSInt(S, Call->getArg(1));
+ const Pointer &src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
- const unsigned NumElems = Dst.getNumElems();
- const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
-
- const unsigned ElemBits = 16;
- const unsigned LaneElems = 128u / ElemBits;
- const unsigned HalfBase = 4;
- assert(NumElems % LaneElems == 0);
+ const unsigned numElts = Dst.getNumElems();
+ const PrimType elemTy = Dst.getFieldDesc()->getPrimType();
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
+ // Only i16/i32 supported
+ const unsigned elemBits = static_cast<unsigned>(primSize(elemTy) * 8);
+ if (elemBits != 16 && elemBits != 32) return false;
- for (unsigned i = 0; i != NumElems; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
+ // Lane: 64b for MMX, 128b otherwise
+ const unsigned totalBits = numElts * elemBits;
+ const unsigned laneBits = (totalBits == 64) ? 64u : 128u;
+ const unsigned laneElts = laneBits / elemBits;
+ assert(laneElts && (numElts % laneElts == 0));
- unsigned srcIdx;
- if (inLane >= HalfBase) {
- const unsigned pos = inLane - HalfBase;
- const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
- srcIdx = laneBase + HalfBase + sel;
- } else {
- srcIdx = i;
- }
+ const uint8_t ctl = static_cast<uint8_t>(controlImm.getZExtValue());
- APSInt Chosen;
- INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
+ for (unsigned idx = 0; idx != numElts; idx++) {
+ const unsigned laneBase = (idx / laneElts) * laneElts;
+ const unsigned laneIdx = idx % laneElts;
- if (!HasMask) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- continue;
- }
+ unsigned srcIdx = idx;
- const bool Keep =
- (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
- if (Keep) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- } else if (IsMaskZ) {
- APSInt Zero(APInt(Chosen.getBitWidth(), 0));
- Zero.setIsSigned(Chosen.isSigned());
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Zero); });
- } else {
- APSInt PT;
- INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
- INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
- }
- }
-
- Dst.initializeAllElements();
- return true;
-}
-
-static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
- (void)OpPC;
- const unsigned NumArgs = Call->getNumArgs();
- assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
-
- APSInt K;
- Pointer SrcPT;
- const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
- const bool IsMaskZ = (NumArgs == 3);
-
- if (NumArgs == 4) {
- K = popToAPSInt(S, Call->getArg(3));
- SrcPT = S.Stk.pop<Pointer>();
- } else if (NumArgs == 3) {
- K = popToAPSInt(S, Call->getArg(2));
- }
-
- APSInt Imm = popToAPSInt(S, Call->getArg(1));
- const Pointer &Src = S.Stk.pop<Pointer>();
- const Pointer &Dst = S.Stk.peek<Pointer>();
-
- const unsigned NumElems = Dst.getNumElems();
- const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
-
- const unsigned ElemBits = 32;
- const unsigned LaneElems = 128u / ElemBits;
- assert(NumElems % LaneElems == 0);
-
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
-
- for (unsigned i = 0; i != NumElems; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
- const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
- const unsigned srcIdx = laneBase + sel;
-
- APSInt Chosen;
- INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });
-
- if (!HasMask) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- continue;
+ if (elemBits == 32) {
+ // PSHUFD: 4×i32 per lane
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else { // 16-bit shuffles
+ if (laneElts == 4) {
+ // MMX: permute all 4×i16
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else {
+ // 128b lanes: shuffle 4×i16 half
+ constexpr unsigned halfSize = 4;
+ if (whichHalf == Half::Low && laneIdx < halfSize) {
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else if (whichHalf == Half::High && laneIdx >= halfSize) {
+ const unsigned rel = laneIdx - halfSize;
+ const unsigned sel = (ctl >> (2 * rel)) & 0x3;
+ srcIdx = laneBase + halfSize + sel;
+ } else if (whichHalf == Half::None) {
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ }
+ }
}
- const bool Keep =
- (i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
- if (Keep) {
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Chosen); });
- } else if (IsMaskZ) {
- APSInt Zero(APInt(Chosen.getBitWidth(), 0));
- Zero.setIsSigned(Chosen.isSigned());
- INT_TYPE_SWITCH_NO_BOOL(ElemT,
- { Dst.elem<T>(i) = static_cast<T>(Zero); });
- } else {
- APSInt PT;
- INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
- INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
- }
+ INT_TYPE_SWITCH_NO_BOOL(elemTy, { Dst.elem<T>(idx) = src.elem<T>(srcIdx); });
}
-
Dst.initializeAllElements();
return true;
}
@@ -3629,39 +3479,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
llvm::APIntOps::mulhs);
- case clang::X86::BI__builtin_ia32_pshuflw:
- case clang::X86::BI__builtin_ia32_pshuflw256:
- case clang::X86::BI__builtin_ia32_pshuflw512:
- case clang::X86::BI__builtin_ia32_pshuflw128_mask:
- case clang::X86::BI__builtin_ia32_pshuflw256_mask:
- case clang::X86::BI__builtin_ia32_pshuflw512_mask:
- case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
- case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
- case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
- return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);
-
- case clang::X86::BI__builtin_ia32_pshufhw:
- case clang::X86::BI__builtin_ia32_pshufhw256:
- case clang::X86::BI__builtin_ia32_pshufhw512:
- case clang::X86::BI__builtin_ia32_pshufhw128_mask:
- case clang::X86::BI__builtin_ia32_pshufhw256_mask:
- case clang::X86::BI__builtin_ia32_pshufhw512_mask:
- case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
- case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
- case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
- return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);
-
- case clang::X86::BI__builtin_ia32_pshufd:
- case clang::X86::BI__builtin_ia32_pshufd256:
- case clang::X86::BI__builtin_ia32_pshufd512:
- case clang::X86::BI__builtin_ia32_pshufd128_mask:
- case clang::X86::BI__builtin_ia32_pshufd256_mask:
- case clang::X86::BI__builtin_ia32_pshufd512_mask:
- case clang::X86::BI__builtin_ia32_pshufd128_maskz:
- case clang::X86::BI__builtin_ia32_pshufd256_maskz:
- case clang::X86::BI__builtin_ia32_pshufd512_maskz:
- return interp__builtin_ia32_pshufd_common(S, OpPC, Call);
-
case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
@@ -3892,6 +3709,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_selectpd_512:
return interp__builtin_select(S, OpPC, Call);
+ case X86::BI__builtin_ia32_pshufw:
+ return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::None);
+
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512:
+ return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::Low);
+
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512:
+ return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::High);
+
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::None);
+
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1ce601d37e0d6..876a80446fd3c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11615,6 +11615,78 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
return true;
}
+static constexpr unsigned noHalf = ~0u;
+
+static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
+ unsigned elemBits, unsigned halfBase,
+ APValue &Out) {
+ // Expect (vec, imm8)
+ APValue vec;
+ APSInt imm;
+ if (!EvaluateAsRValue(Info, Call->getArg(0), vec)) return false;
+ if (!EvaluateInteger(Call->getArg(1), imm, Info)) return false;
+
+ const auto *vt = Call->getType()->getAs<VectorType>();
+ if (!vt) return false;
+ const unsigned nElts = vt->getNumElements();
+
+ // Lane geometry: MMX pshufw is a single 64-bit lane; others use 128-bit lanes.
+ const unsigned totalBits = nElts * elemBits;
+ const unsigned laneBits = (totalBits == 64) ? 64u : 128u;
+ const unsigned laneElts = laneBits / elemBits;
+ if (!laneElts || (nElts % laneElts) != 0) return false;
+
+ const uint8_t ctl = static_cast<uint8_t>(imm.getZExtValue());
+
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(nElts);
+
+ for (unsigned idx = 0; idx != nElts; idx++) {
+ const unsigned laneBase = (idx / laneElts) * laneElts;
+ const unsigned laneIdx = idx % laneElts;
+
+ unsigned srcIdx = idx;
+
+ if (elemBits == 32) {
+ // PSHUFD: permute 4×i32 per 128-bit lane
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else {
+ // elemBits == 16 (PSHUFLW / PSHUFHW / PSHUFW)
+ if (laneElts == 4) {
+ // MMX PSHUFW: permute entire 64-bit lane (4×i16)
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ } else {
+ // SSE/AVX/AVX-512: 128-bit lane has 8×i16. Permute a 4×i16 half.
+ constexpr unsigned halfSize = 4;
+ if (halfBase == 0) {
+ // PSHUFLW: permute low half (words 0..3)
+ if (laneIdx < halfSize) {
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ }
+ } else if (halfBase == halfSize) {
+ // PSHUFHW: permute high half (words 4..7)
+ if (laneIdx >= halfSize) {
+ const unsigned rel = laneIdx - halfSize;
+ const unsigned sel = (ctl >> (2 * rel)) & 0x3;
+ srcIdx = laneBase + halfBase + sel;
+ }
+ } else {
+ const unsigned sel = (ctl >> (2 * laneIdx)) & 0x3;
+ srcIdx = laneBase + sel;
+ }
+ }
+ }
+
+ ResultElements.push_back(vec.getVectorElt(srcIdx));
+ }
+
+ Out = APValue(ResultElements.data(), ResultElements.size());
+ return true;
+}
+
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -11868,294 +11940,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
-
- case X86::BI__builtin_ia32_pshufw: {
- APValue Src;
- APSInt Imm;
- if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false;
- if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false;
-
- unsigned N = Src.getVectorLength();
- SmallVector<APValue, 4> ResultElements;
- ResultElements.reserve(N);
-
- uint8_t C = static_cast<uint8_t>(Imm.getZExtValue());
- for (unsigned i = 0; i != N; ++i) {
- unsigned sel = (C >> (2 * i)) & 0x3;
- ResultElements.push_back(Src.getVectorElt(sel));
- }
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
- }
-
- case clang::X86::BI__builtin_ia32_pshuflw:
- case clang::X86::BI__builtin_ia32_pshuflw256:
- case clang::X86::BI__builtin_ia32_pshuflw512:
- case clang::X86::BI__builtin_ia32_pshuflw128_mask:
- case clang::X86::BI__builtin_ia32_pshuflw256_mask:
- case clang::X86::BI__builtin_ia32_pshuflw512_mask:
- case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
- case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
- case clang::X86::BI__builtin_ia32_pshuflw512_maskz: {
- const unsigned BID = E->getBuiltinCallee();
-
- const bool IsMask =
- BID == clang::X86::BI__builtin_ia32_pshuflw128_mask ||
- BID == clang::X86::BI__builtin_ia32_pshuflw256_mask ||
- BID == clang::X86::BI__builtin_ia32_pshuflw512_mask;
-
- const bool IsMaskZ =
- BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz;
-
- const unsigned AIdx = 0, ImmIdx = 1;
- const unsigned SrcIdx = 2;
- const unsigned KIdx = IsMaskZ ? 2 : 3;
-
- APValue AVal, SrcVal;
- APSInt Imm, K;
- if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
- if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
-
- const APSInt *KPtr = nullptr;
- const APValue *PassThru = nullptr;
- bool ZeroInactive = false;
-
- if (IsMask) {
- if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
- } else if (IsMaskZ) {
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = nullptr; ZeroInactive = true;
- }
-
- const auto *VT = E->getType()->getAs<VectorType>();
- if (!VT) return false;
- const unsigned NumElts = VT->getNumElements();
-
- const unsigned ElemBits = 16;
- const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
- const unsigned Half = 4;
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
- const bool DestUnsigned =
- VT->getElementType()->isUnsignedIntegerOrEnumerationType();
-
- auto MakeZero = [&]() -> APValue {
- return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
- };
-
- SmallVector<APValue, 32> ResultElements;
- ResultElements.reserve(NumElts);
-
- for (unsigned i = 0; i < NumElts; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
-
- APValue Chosen;
- if (inLane < Half) {
- const unsigned pos = inLane;
- const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
- const unsigned srcIdx = laneBase + sel;
- Chosen = AVal.getVectorElt(srcIdx);
- } else {
- Chosen = AVal.getVectorElt(i);
- }
-
- if (KPtr) {
- const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
- if (Keep) {
- ResultElements.push_back(Chosen);
- } else if (ZeroInactive) {
- ResultElements.push_back(MakeZero());
- } else {
- const APValue &PT = PassThru ? PassThru->getVectorElt(i)
- : AVal.getVectorElt(i);
- ResultElements.push_back(PT);
- }
- } else {
- ResultElements.push_back(Chosen);
- }
- }
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
- }
-
- case clang::X86::BI__builtin_ia32_pshufhw:
- case clang::X86::BI__builtin_ia32_pshufhw256:
- case clang::X86::BI__builtin_ia32_pshufhw512:
- case clang::X86::BI__builtin_ia32_pshufhw128_mask:
- case clang::X86::BI__builtin_ia32_pshufhw256_mask:
- case clang::X86::BI__builtin_ia32_pshufhw512_mask:
- case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
- case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
- case clang::X86::BI__builtin_ia32_pshufhw512_maskz: {
- const unsigned BID = E->getBuiltinCallee();
-
- const bool IsMask =
- BID == clang::X86::BI__builtin_ia32_pshufhw128_mask ||
- BID == clang::X86::BI__builtin_ia32_pshufhw256_mask ||
- BID == clang::X86::BI__builtin_ia32_pshufhw512_mask;
-
- const bool IsMaskZ =
- BID == clang::X86::BI__builtin_ia32_pshufhw128_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshufhw256_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshufhw512_maskz;
-
- const unsigned AIdx = 0, ImmIdx = 1;
- const unsigned SrcIdx = 2;
- const unsigned KIdx = IsMaskZ ? 2 : 3;
-
- APValue AVal, SrcVal;
- APSInt Imm, K;
- if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
- if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
-
- const APSInt *KPtr = nullptr;
- const APValue *PassThru = nullptr;
- bool ZeroInactive = false;
- if (IsMask) {
- if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
- } else if (IsMaskZ) {
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = nullptr; ZeroInactive = true;
- }
-
- const auto *VT = E->getType()->getAs<VectorType>();
- if (!VT) return false;
- const unsigned NumElts = VT->getNumElements();
- const unsigned ElemBits = 16;
- const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
- const unsigned Half = 4;
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
- const bool DestUnsigned =
- VT->getElementType()->isUnsignedIntegerOrEnumerationType();
-
- auto MakeZero = [&]() -> APValue {
- return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
- };
-
- SmallVector<APValue, 32> ResultElements;
- ResultElements.reserve(NumElts);
-
- for (unsigned i = 0; i < NumElts; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
-
- APValue Chosen;
- if (inLane >= Half) {
- const unsigned pos = inLane - Half;
- const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
- const unsigned srcIdx = laneBase + Half + sel;
- Chosen = AVal.getVectorElt(srcIdx);
- } else {
- Chosen = AVal.getVectorElt(i);
- }
-
- if (KPtr) {
- const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
- if (Keep) {
- ResultElements.push_back(Chosen);
- } else if (ZeroInactive) {
- ResultElements.push_back(MakeZero());
- } else {
- const APValue &PT = PassThru ? PassThru->getVectorElt(i)
- : AVal.getVectorElt(i);
- ResultElements.push_back(PT);
- }
- } else {
- ResultElements.push_back(Chosen);
- }
- }
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
- }
-
- case clang::X86::BI__builtin_ia32_pshufd:
- case clang::X86::BI__builtin_ia32_pshufd256:
- case clang::X86::BI__builtin_ia32_pshufd512:
- case clang::X86::BI__builtin_ia32_pshufd128_mask:
- case clang::X86::BI__builtin_ia32_pshufd256_mask:
- case clang::X86::BI__builtin_ia32_pshufd512_mask:
- case clang::X86::BI__builtin_ia32_pshufd128_maskz:
- case clang::X86::BI__builtin_ia32_pshufd256_maskz:
- case clang::X86::BI__builtin_ia32_pshufd512_maskz: {
- const unsigned BID = E->getBuiltinCallee();
-
- const bool IsMask =
- BID == clang::X86::BI__builtin_ia32_pshufd512_mask ||
- BID == clang::X86::BI__builtin_ia32_pshufd128_mask ||
- BID == clang::X86::BI__builtin_ia32_pshufd256_mask;
-
- const bool IsMaskZ =
- BID == clang::X86::BI__builtin_ia32_pshufd512_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshufd128_maskz ||
- BID == clang::X86::BI__builtin_ia32_pshufd256_maskz;
-
- const unsigned AIdx = 0, ImmIdx = 1;
- const unsigned SrcIdx = 2;
- const unsigned KIdx = IsMaskZ ? 2 : 3;
-
- APValue AVal, SrcVal;
- APSInt Imm, K;
- if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false;
- if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false;
-
- const APSInt *KPtr = nullptr;
- const APValue *PassThru = nullptr;
- bool ZeroInactive = false;
- if (IsMask) {
- if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false;
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = &SrcVal; ZeroInactive = false;
- } else if (IsMaskZ) {
- if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false;
- KPtr = &K; PassThru = nullptr; ZeroInactive = true;
- }
-
- const auto *VT = E->getType()->getAs<VectorType>();
- if (!VT) return false;
- const unsigned NumElts = VT->getNumElements();
- const unsigned ElemBits = 32;
- const unsigned LaneElems = std::min(NumElts, 128u / ElemBits);
- const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
- const bool DestUnsigned =
- VT->getElementType()->isUnsignedIntegerOrEnumerationType();
-
- auto MakeZero = [&]() -> APValue {
- return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned));
- };
-
- SmallVector<APValue, 32> ResultElements;
- ResultElements.reserve(NumElts);
-
- for (unsigned i = 0; i < NumElts; ++i) {
- const unsigned laneBase = (i / LaneElems) * LaneElems;
- const unsigned inLane = i % LaneElems;
-
- const unsigned pos = inLane & 3;
- const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
- const unsigned srcIdx = laneBase + sel;
- APValue Chosen = AVal.getVectorElt(srcIdx);
-
- if (KPtr) {
- const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false;
- if (Keep) {
- ResultElements.push_back(Chosen);
- } else if (ZeroInactive) {
- ResultElements.push_back(MakeZero());
- } else {
- const APValue &PT = PassThru ? PassThru->getVectorElt(i)
- : AVal.getVectorElt(i);
- ResultElements.push_back(PT);
- }
- } else {
- ResultElements.push_back(Chosen);
- }
- }
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
- }
-
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
@@ -12374,6 +12158,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case X86::BI__builtin_ia32_pshufw: {
+ APValue R;
+ if (!evalPshufBuiltin(Info, E, /*ElemBits=*/16, /*HalfBaseElems=*/noHalf, R)) return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512: {
+ APValue R;
+ if (!evalPshufBuiltin(Info, E, /*ElemBits=*/16, /*HalfBaseElems=*/0, R))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512: {
+ APValue R;
+ if (!evalPshufBuiltin(Info, E, /*ElemBits=*/16, /*HalfBaseElems=*/4, R))
+ return false;
+ return Success(R, E);
+ }
+
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512: {
+ APValue R;
+ if (!evalPshufBuiltin(Info, E, /*ElemBits=*/32, /*HalfBaseElems=*/noHalf, R))
+ return false;
+ return Success(R, E);
+ }
+
case Builtin::BI__builtin_elementwise_clzg:
case Builtin::BI__builtin_elementwise_ctzg: {
APValue SourceLHS;
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 01b5cea02cb1c..5f617530b6f78 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -39,19 +39,14 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS_MMX \
- __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
-
#define __DEFAULT_FN_ATTRS_SSE2 \
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
__min_vector_width__(128)))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
-#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX constexpr
#else
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
-#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX
#endif
#define __trunc64(x) \
More information about the cfe-commits
mailing list