[llvm] r307255 - [X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 6 05:40:10 PDT 2017
Author: rksimon
Date: Thu Jul 6 05:40:10 2017
New Revision: 307255
URL: http://llvm.org/viewvc/llvm-project?rev=307255&view=rev
Log:
[X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307255&r1=307254&r2=307255&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 6 05:40:10 2017
@@ -27148,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
const X86Subtarget &Subtarget,
@@ -27158,14 +27159,8 @@ static bool matchUnaryPermuteVectorShuff
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
- bool ContainsZeros = false;
- APInt Zeroable(NumMaskElts, false);
- for (unsigned i = 0; i != NumMaskElts; ++i) {
- int M = Mask[i];
- if (isUndefOrZero(M))
- Zeroable.setBit(i);
- ContainsZeros |= (M == SM_SentinelZero);
- }
+ bool ContainsZeros =
+ llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
if (!ContainsZeros && MaskScalarSizeInBits == 64) {
@@ -27330,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ const APInt &Zeroable,
bool AllowFloatDomain,
bool AllowIntDomain,
SDValue &V1, SDValue &V2, SDLoc &DL,
@@ -27415,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuf
// Attempt to combine to INSERTPS.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
MaskVT.is128BitVector()) {
- APInt Zeroable(4, 0);
- for (unsigned i = 0; i != NumMaskElts; ++i)
- if (Mask[i] < 0)
- Zeroable.setBit(i);
-
if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
@@ -27608,6 +27599,12 @@ static bool combineX86ShuffleChain(Array
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
+ // Determine zeroable mask elements.
+ APInt Zeroable(NumMaskElts, 0);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ if (isUndefOrZero(Mask[i]))
+ Zeroable.setBit(i);
+
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
@@ -27640,7 +27637,7 @@ static bool combineX86ShuffleChain(Array
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle,
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
@@ -27676,7 +27673,7 @@ static bool combineX86ShuffleChain(Array
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
@@ -27701,11 +27698,6 @@ static bool combineX86ShuffleChain(Array
ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
- APInt Zeroable(NumMaskElts, 0);
- for (unsigned i = 0; i != NumMaskElts; ++i)
- if (isUndefOrZero(Mask[i]))
- Zeroable.setBit(i);
-
uint64_t BitLen, BitIdx;
if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
More information about the llvm-commits
mailing list