[llvm] r349374 - [TargetLowering] Add DemandedElts mask to SimplifyDemandedBits (PR40000)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 17 10:43:43 PST 2018


Author: rksimon
Date: Mon Dec 17 10:43:43 2018
New Revision: 349374

URL: http://llvm.org/viewvc/llvm-project?rev=349374&view=rev
Log:
[TargetLowering] Add DemandedElts mask to SimplifyDemandedBits (PR40000)

This is an initial patch to add the necessary support for a DemandedElts argument to SimplifyDemandedBits, more closely matching computeKnownBits and to help improve vector codegen.

I've added only a small amount of the changes necessary to get at least one test to update - a lot more can be done but I'd like to add these methodically with proper test coverage, at the same time the hope is to slowly move some/all of SimplifyDemandedVectorElts into SimplifyDemandedBits as well.

Differential Revision: https://reviews.llvm.org/D55768

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/combine-sdiv.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=349374&r1=349373&r2=349374&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Dec 17 10:43:43 2018
@@ -2899,22 +2899,28 @@ public:
   bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
                             DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
 
-  /// Look at Op.  At this point, we know that only the DemandedMask bits of the
+  /// Look at Op.  At this point, we know that only the DemandedBits bits of the
   /// result of Op are ever used downstream.  If we can use this information to
   /// simplify Op, create a new simplified DAG node and return true, returning
   /// the original and new nodes in Old and New.  Otherwise, analyze the
   /// expression and return a mask of KnownOne and KnownZero bits for the
   /// expression (used to simplify the caller).  The KnownZero/One bits may only
-  /// be accurate for those bits in the DemandedMask.
+  /// be accurate for those bits in the Demanded masks.
   /// \p AssumeSingleUse When this parameter is true, this function will
   ///    attempt to simplify \p Op even if there are multiple uses.
   ///    Callers are responsible for correctly updating the DAG based on the
   ///    results of this function, because simply replacing replacing TLO.Old
   ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
   ///    has multiple uses.
-  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
-                            KnownBits &Known,
-                            TargetLoweringOpt &TLO,
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                            const APInt &DemandedElts, KnownBits &Known,
+                            TargetLoweringOpt &TLO, unsigned Depth = 0,
+                            bool AssumeSingleUse = false) const;
+
+  /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
+  /// Adds Op back to the worklist upon success.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                            KnownBits &Known, TargetLoweringOpt &TLO,
                             unsigned Depth = 0,
                             bool AssumeSingleUse = false) const;
 
@@ -2985,13 +2991,14 @@ public:
       SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
       APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
 
-  /// Attempt to simplify any target nodes based on the demanded bits,
+  /// Attempt to simplify any target nodes based on the demanded bits/elts,
   /// returning true on success. Otherwise, analyze the
   /// expression and return a mask of KnownOne and KnownZero bits for the
   /// expression (used to simplify the caller).  The KnownZero/One bits may only
-  /// be accurate for those bits in the DemandedMask.
+  /// be accurate for those bits in the Demanded masks.
   virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
                                                  const APInt &DemandedBits,
+                                                 const APInt &DemandedElts,
                                                  KnownBits &Known,
                                                  TargetLoweringOpt &TLO,
                                                  unsigned Depth = 0) const;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=349374&r1=349373&r2=349374&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Mon Dec 17 10:43:43 2018
@@ -496,23 +496,41 @@ bool TargetLowering::SimplifyDemandedBit
   return Simplified;
 }
 
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                                          KnownBits &Known,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth,
+                                          bool AssumeSingleUse) const {
+  EVT VT = Op.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
+                              AssumeSingleUse);
+}
+
 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
 /// result of Op are ever used downstream. If we can use this information to
 /// simplify Op, create a new simplified DAG node and return true, returning the
 /// original and new nodes in Old and New. Otherwise, analyze the expression and
 /// return a mask of Known bits for the expression (used to simplify the
 /// caller).  The Known bits may only be accurate for those bits in the
-/// DemandedMask.
-bool TargetLowering::SimplifyDemandedBits(SDValue Op,
-                                          const APInt &OriginalDemandedBits,
-                                          KnownBits &Known,
-                                          TargetLoweringOpt &TLO,
-                                          unsigned Depth,
-                                          bool AssumeSingleUse) const {
+/// OriginalDemandedBits and OriginalDemandedElts.
+bool TargetLowering::SimplifyDemandedBits(
+    SDValue Op, const APInt &OriginalDemandedBits,
+    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+    unsigned Depth, bool AssumeSingleUse) const {
   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
   assert(Op.getScalarValueSizeInBits() == BitWidth &&
          "Mask size mismatches value type size!");
+
+  unsigned NumElts = OriginalDemandedElts.getBitWidth();
+  assert((!Op.getValueType().isVector() ||
+          NumElts == Op.getValueType().getVectorNumElements()) &&
+         "Unexpected vector size");
+
   APInt DemandedBits = OriginalDemandedBits;
+  APInt DemandedElts = OriginalDemandedElts;
   SDLoc dl(Op);
   auto &DL = TLO.DAG.getDataLayout();
 
@@ -532,18 +550,19 @@ bool TargetLowering::SimplifyDemandedBit
     if (Depth != 0) {
       // If not at the root, Just compute the Known bits to
       // simplify things downstream.
-      TLO.DAG.computeKnownBits(Op, Known, Depth);
+      TLO.DAG.computeKnownBits(Op, Known, DemandedElts, Depth);
       return false;
     }
     // If this is the root being simplified, allow it to have multiple uses,
-    // just set the DemandedBits to all bits.
+    // just set the DemandedBits/Elts to all bits.
     DemandedBits = APInt::getAllOnesValue(BitWidth);
-  } else if (OriginalDemandedBits == 0) {
-    // Not demanding any bits from Op.
+    DemandedElts = APInt::getAllOnesValue(NumElts);
+  } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
+    // Not demanding any bits/elts from Op.
     if (!Op.isUndef())
       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
     return false;
-  } else if (Depth == 6) {        // Limit search depth.
+  } else if (Depth == 6) { // Limit search depth.
     return false;
   }
 
@@ -573,18 +592,71 @@ bool TargetLowering::SimplifyDemandedBit
       Known.One &= Known2.One;
       Known.Zero &= Known2.Zero;
     }
-    return false;   // Don't fall through, will infinitely loop.
-  case ISD::CONCAT_VECTORS:
+    return false; // Don't fall through, will infinitely loop.
+  case ISD::CONCAT_VECTORS: {
     Known.Zero.setAllBits();
     Known.One.setAllBits();
-    for (SDValue SrcOp : Op->ops()) {
-      if (SimplifyDemandedBits(SrcOp, DemandedBits, Known2, TLO, Depth + 1))
+    EVT SubVT = Op.getOperand(0).getValueType();
+    unsigned NumSubVecs = Op.getNumOperands();
+    unsigned NumSubElts = SubVT.getVectorNumElements();
+    for (unsigned i = 0; i != NumSubVecs; ++i) {
+      APInt DemandedSubElts =
+          DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+      if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
+                               Known2, TLO, Depth + 1))
         return true;
-      // Known bits are the values that are shared by every subvector.
-      Known.One &= Known2.One;
-      Known.Zero &= Known2.Zero;
+      // Known bits are shared by every demanded subvector element.
+      if (!!DemandedSubElts) {
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
     }
     break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+    // Collect demanded elements from shuffle operands..
+    APInt DemandedLHS(NumElts, 0);
+    APInt DemandedRHS(NumElts, 0);
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (!DemandedElts[i])
+        continue;
+      int M = ShuffleMask[i];
+      if (M < 0) {
+        // For UNDEF elements, we don't know anything about the common state of
+        // the shuffle result.
+        DemandedLHS.clearAllBits();
+        DemandedRHS.clearAllBits();
+        break;
+      }
+      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+      if (M < (int)NumElts)
+        DemandedLHS.setBit(M);
+      else
+        DemandedRHS.setBit(M - NumElts);
+    }
+
+    if (!!DemandedLHS || !!DemandedRHS) {
+      Known.Zero.setAllBits();
+      Known.One.setAllBits();
+      if (!!DemandedLHS) {
+        if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
+                                 Known2, TLO, Depth + 1))
+          return true;
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+      if (!!DemandedRHS) {
+        if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
+                                 Known2, TLO, Depth + 1))
+          return true;
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+    }
+    break;
+  }
   case ISD::AND: {
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
@@ -596,7 +668,7 @@ bool TargetLowering::SimplifyDemandedBit
     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
       KnownBits LHSKnown;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
+      TLO.DAG.computeKnownBits(Op0, LHSKnown, DemandedElts, Depth);
       // If the LHS already has zeros where RHSC does, this 'and' is dead.
       if ((LHSKnown.Zero & DemandedBits) ==
           (~RHSC->getAPIntValue() & DemandedBits))
@@ -619,10 +691,10 @@ bool TargetLowering::SimplifyDemandedBit
       }
     }
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, Known2, TLO,
+    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
                              Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -653,10 +725,11 @@ bool TargetLowering::SimplifyDemandedBit
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, Known2, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
@@ -683,10 +756,10 @@ bool TargetLowering::SimplifyDemandedBit
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, DemandedBits, Known2, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
@@ -840,7 +913,7 @@ bool TargetLowering::SimplifyDemandedBit
         }
       }
 
-      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), Known, TLO,
+      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
                                Depth + 1))
         return true;
 
@@ -935,7 +1008,7 @@ bool TargetLowering::SimplifyDemandedBit
       }
 
       // Compute the new bits that are at the top now.
-      if (SimplifyDemandedBits(Op0, InDemandedMask, Known, TLO, Depth + 1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
@@ -974,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBit
       if (DemandedBits.countLeadingZeros() < ShAmt)
         InDemandedMask.setSignBit();
 
-      if (SimplifyDemandedBits(Op0, InDemandedMask, Known, TLO, Depth + 1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
@@ -1221,18 +1294,26 @@ bool TargetLowering::SimplifyDemandedBit
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT: {
-    // Demand the bits from every vector element.
     SDValue Src = Op.getOperand(0);
+    SDValue Idx = Op.getOperand(1);
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
 
+    // Demand the bits from every vector element without a constant index.
+    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+    if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
+      if (CIdx->getAPIntValue().ult(NumSrcElts))
+        DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
+
     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
     // anything about the extended bits.
     APInt DemandedSrcBits = DemandedBits;
     if (BitWidth > EltBitWidth)
       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
 
-    if (SimplifyDemandedBits(Src, DemandedSrcBits, Known2, TLO, Depth + 1))
-        return true;
+    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
+                             Depth + 1))
+      return true;
 
     Known = Known2;
     if (BitWidth > EltBitWidth)
@@ -1313,8 +1394,8 @@ bool TargetLowering::SimplifyDemandedBit
     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
-    if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
-        SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
+    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
         // See if the operation should be performed at a smaller bit width.
         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
       SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1354,14 +1435,14 @@ bool TargetLowering::SimplifyDemandedBit
   }
   default:
     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
-      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, Known, TLO,
-                                            Depth))
+      if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
+                                            Known, TLO, Depth))
         return true;
       break;
     }
 
     // Just use computeKnownBits to compute output bits.
-    TLO.DAG.computeKnownBits(Op, Known, Depth);
+    TLO.DAG.computeKnownBits(Op, Known, DemandedElts, Depth);
     break;
   }
 
@@ -1887,8 +1968,8 @@ bool TargetLowering::SimplifyDemandedVec
 }
 
 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
-    SDValue Op, const APInt &DemandedBits, KnownBits &Known,
-    TargetLoweringOpt &TLO, unsigned Depth) const {
+    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+    KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
@@ -1896,9 +1977,6 @@ bool TargetLowering::SimplifyDemandedBit
          "Should use SimplifyDemandedBits if you don't know whether Op"
          " is a target node!");
   EVT VT = Op.getValueType();
-  APInt DemandedElts = VT.isVector()
-                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
-                           : APInt(1, 1);
   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
   return false;
 }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=349374&r1=349373&r2=349374&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 17 10:43:43 2018
@@ -32397,8 +32397,9 @@ bool X86TargetLowering::SimplifyDemanded
 }
 
 bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
-    SDValue Op, const APInt &OriginalDemandedBits, KnownBits &Known,
-    TargetLoweringOpt &TLO, unsigned Depth) const {
+    SDValue Op, const APInt &OriginalDemandedBits,
+    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+    unsigned Depth) const {
   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
   unsigned Opc = Op.getOpcode();
   switch(Opc) {
@@ -32424,8 +32425,8 @@ bool X86TargetLowering::SimplifyDemanded
       KnownBits KnownOp;
       unsigned ShAmt = ShiftImm->getZExtValue();
       APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
-      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
-                               Depth + 1))
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                               OriginalDemandedElts, KnownOp, TLO, Depth + 1))
         return true;
     }
     break;
@@ -32446,8 +32447,8 @@ bool X86TargetLowering::SimplifyDemanded
           OriginalDemandedBits.countLeadingZeros() < ShAmt)
         DemandedMask.setSignBit();
 
-      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
-                               Depth + 1))
+      if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
+                               OriginalDemandedElts, KnownOp, TLO, Depth + 1))
         return true;
     }
     break;
@@ -32475,8 +32476,8 @@ bool X86TargetLowering::SimplifyDemanded
 
     // MOVMSK only uses the MSB from each vector element.
     KnownBits KnownSrc;
-    if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), KnownSrc, TLO,
-                             Depth + 1))
+    if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
+                             KnownSrc, TLO, Depth + 1))
       return true;
 
     if (KnownSrc.One[SrcBits - 1])
@@ -32488,7 +32489,7 @@ bool X86TargetLowering::SimplifyDemanded
   }
 
   return TargetLowering::SimplifyDemandedBitsForTargetNode(
-      Op, OriginalDemandedBits, Known, TLO, Depth);
+      Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
 }
 
 /// Check if a vector extract from a target-specific shuffle of a load can be

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=349374&r1=349373&r2=349374&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Dec 17 10:43:43 2018
@@ -871,6 +871,7 @@ namespace llvm {
 
     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
                                            const APInt &DemandedBits,
+                                           const APInt &DemandedElts,
                                            KnownBits &Known,
                                            TargetLoweringOpt &TLO,
                                            unsigned Depth) const override;

Modified: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sdiv.ll?rev=349374&r1=349373&r2=349374&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll Mon Dec 17 10:43:43 2018
@@ -3002,7 +3002,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; SSE2-NEXT:    packuswb %xmm0, %xmm2
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
@@ -3033,7 +3032,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; SSE41-NEXT:    packuswb %xmm1, %xmm2
 ; SSE41-NEXT:    psrlw $7, %xmm0
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    paddb %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -3059,7 +3057,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
@@ -3078,7 +3075,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; AVX2-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -3093,7 +3089,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; AVX512F-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
 ; AVX512F-NEXT:    vpsrlw $7, %xmm0, %xmm1
 ; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512F-NEXT:    vpsravd {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
@@ -3110,7 +3105,6 @@ define <16 x i8> @pr38658(<16 x i8> %x)
 ; AVX512BW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpmovsxbw %xmm0, %ymm0
 ; AVX512BW-NEXT:    vpsravw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpmovwb %ymm0, %xmm0




More information about the llvm-commits mailing list