[llvm] r256992 - [X86] Determine if target shuffle can contain zero elements
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 6 15:24:40 PST 2016
Author: rksimon
Date: Wed Jan 6 17:24:40 2016
New Revision: 256992
URL: http://llvm.org/viewvc/llvm-project?rev=256992&view=rev
Log:
[X86] Determine if target shuffle can contain zero elements
getTargetShuffleMask may return shuffle masks with SM_SentinelZero (-2) values (currently just for PSHUFB but VPERM2X128 as well with this patch). Although some calling functions can make use of this (mainly for shuffle combining), others can not and their inclusion makes shuffle mask comparisons more difficult.
This patch adds a flag to getTargetShuffleMask to indicate if the calling function can't handle SM_SentinelZero; getTargetShuffleMask will then return false if it occurs to make handling much easier.
I've tidied up some uses of getTargetShuffleMask to better indicate what is going on - more could be done but at present I don't have test cases to demonstrate it.
Some upcoming patches will make use of this to both support more uses where SM_SentinelZero is not permitted (e.g. combineShuffleToAddSub), and also will allow us to add INSERTPS support to getTargetShuffleMask as part of better zero handling discussed in D14261.
Differential Revision: http://reviews.llvm.org/D15378
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=256992&r1=256991&r2=256992&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan 6 17:24:40 2016
@@ -4743,8 +4743,7 @@ static SDValue getShuffleVectorZeroOrUnd
/// uses one source. Note that this will set IsUnary for shuffles which use a
/// single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
-/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
-static bool getTargetShuffleMask(SDNode *N, MVT VT,
+static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
@@ -4870,10 +4869,7 @@ static bool getTargetShuffleMask(SDNode
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
- // Mask only contains negative index if an element is zero.
- if (std::any_of(Mask.begin(), Mask.end(),
- [](int M){ return M == SM_SentinelZero; }))
- return false;
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
@@ -5008,6 +5004,12 @@ static bool getTargetShuffleMask(SDNode
if (Mask.empty())
return false;
+ // Check if we're getting a shuffle mask with zero'd elements.
+ if (!AllowSentinelZero)
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
+
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
@@ -5046,19 +5048,19 @@ static SDValue getShuffleScalarElt(SDNod
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
- unsigned NumElems = ShufVT.getVectorNumElements();
+ int NumElems = (int)ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
- if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
+ if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
- if (Elt < 0)
+ if (Elt == SM_SentinelUndef)
return DAG.getUNDEF(ShufVT.getVectorElementType());
- SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
- : N->getOperand(1);
+ assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
+ SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -23188,7 +23190,7 @@ static bool combineX86ShufflesRecursivel
return false;
SmallVector<int, 16> OpMask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary);
// We only can combine unary shuffles which we can decode the mask for.
if (!HaveMask || !IsUnary)
return false;
@@ -23285,7 +23287,7 @@ static SmallVector<int, 4> getPSHUFShuff
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
@@ -23887,13 +23889,13 @@ static SDValue XFormVExtractWithShuffleI
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
- ShuffleMask, UnaryShuffle))
+ false, ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
More information about the llvm-commits
mailing list