[llvm] r307255 - [X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks

Thu Jul 6 05:40:10 PDT 2017

Author: rksimon
Date: Thu Jul  6 05:40:10 2017
New Revision: 307255

URL: http://llvm.org/viewvc/llvm-project?rev=307255&view=rev
Log:
[X86][SSE] combineX86ShuffleChain - merge duplicate 'Zeroable' element masks

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307255&r1=307254&r2=307255&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul  6 05:40:10 2017
@@ -27148,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT
 // permute instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
 static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                           const APInt &Zeroable,
                                            bool AllowFloatDomain,
                                            bool AllowIntDomain,
                                            const X86Subtarget &Subtarget,
@@ -27158,14 +27159,8 @@ static bool matchUnaryPermuteVectorShuff
   unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
   MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
 
-  bool ContainsZeros = false;
-  APInt Zeroable(NumMaskElts, false);
-  for (unsigned i = 0; i != NumMaskElts; ++i) {
-    int M = Mask[i];
-    if (isUndefOrZero(M))
-      Zeroable.setBit(i);
-    ContainsZeros |= (M == SM_SentinelZero);
-  }
+  bool ContainsZeros =
+      llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
 
   // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
   if (!ContainsZeros && MaskScalarSizeInBits == 64) {
@@ -27330,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT
 }
 
 static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                            const APInt &Zeroable,
                                             bool AllowFloatDomain,
                                             bool AllowIntDomain,
                                             SDValue &V1, SDValue &V2, SDLoc &DL,
@@ -27415,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuf
   // Attempt to combine to INSERTPS.
   if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
       MaskVT.is128BitVector()) {
-    APInt Zeroable(4, 0);
-    for (unsigned i = 0; i != NumMaskElts; ++i)
-      if (Mask[i] < 0)
-        Zeroable.setBit(i);
-
     if (Zeroable.getBoolValue() &&
         matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
       Shuffle = X86ISD::INSERTPS;
@@ -27608,6 +27599,12 @@ static bool combineX86ShuffleChain(Array
   bool AllowIntDomain = (!FloatDomain || (Depth > 3)) &&
                         (!MaskVT.is256BitVector() || Subtarget.hasAVX2());
 
+  // Determine zeroable mask elements.
+  APInt Zeroable(NumMaskElts, 0);
+  for (unsigned i = 0; i != NumMaskElts; ++i)
+    if (isUndefOrZero(Mask[i]))
+      Zeroable.setBit(i);
+
   if (UnaryShuffle) {
     // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
     // directly if we don't shuffle the lower element and we shuffle the upper
@@ -27640,7 +27637,7 @@ static bool combineX86ShuffleChain(Array
       return true;
     }
 
-    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
                                        AllowIntDomain, Subtarget, Shuffle,
                                        ShuffleVT, PermuteImm)) {
       if (Depth == 1 && Root.getOpcode() == Shuffle)
@@ -27676,7 +27673,7 @@ static bool combineX86ShuffleChain(Array
     return true;
   }
 
-  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
                                       AllowIntDomain, V1, V2, DL, DAG,
                                       Subtarget, Shuffle, ShuffleVT,
                                       PermuteImm)) {
@@ -27701,11 +27698,6 @@ static bool combineX86ShuffleChain(Array
     ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
     ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
 
-    APInt Zeroable(NumMaskElts, 0);
-    for (unsigned i = 0; i != NumMaskElts; ++i)
-      if (isUndefOrZero(Mask[i]))
-        Zeroable.setBit(i);
-
     uint64_t BitLen, BitIdx;
     if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
                                   Zeroable)) {