[llvm] r352880 - [TargetLowering] try harder to determine undef elements of vector binops

Fri Feb 1 07:35:12 PST 2019

Author: spatel
Date: Fri Feb  1 07:35:12 2019
New Revision: 352880

URL: http://llvm.org/viewvc/llvm-project?rev=352880&view=rev
Log:
[TargetLowering] try harder to determine undef elements of vector binops

This might be the start of tracking all vector element constants generally if we take it to its 
logical conclusion, but let's stop here and make sure this is correct/beneficial so far.

The affected tests require a convoluted path before they get simplified currently because we 
don't call SimplifyDemandedVectorElts() from binops directly and don't modify the binop operands 
directly in SimplifyDemandedVectorElts().

That's why the tests all have a trailing shuffle to induce a chain reaction of transforms. So 
something like this is happening:

1. Improve the knowledge of undefs in the binop via a SimplifyDemandedVectorElts() call that 
   originates from a shuffle.
2. Transfer that undef knowledge back to the shuffle mask user as more undef lanes.
3. Combine the modified shuffle by calling SimplifyDemandedVectorElts() again.
4. Translate the improved shuffle mask as undemanded lanes of build vector constants causing 
   those to become full undef constants.
5. Simplify the binop now that it has a full undef operand.

As we can see from the unchanged 'and' and 'or' tests, tracking undefs alone isn't a full solution. 
We would need to track zero and all-ones constants to improve those opcodes. We'd probably need to 
track NaN for FP ops too (assuming we don't have fast-math-flags set).

Differential Revision: https://reviews.llvm.org/D57066

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=352880&r1=352879&r2=352880&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Feb  1 07:35:12 2019
@@ -1433,6 +1433,53 @@ bool TargetLowering::SimplifyDemandedVec
   return Simplified;
 }
 
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+                                         const APInt &UndefOp0,
+                                         const APInt &UndefOp1) {
+  EVT VT = BO.getValueType();
+  assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only");
+
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(UndefOp0.getBitWidth() == NumElts &&
+         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+                                   const APInt &UndefVals) {
+    if (UndefVals[Index])
+      return DAG.getUNDEF(EltVT);
+
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      // Try hard to make sure that the getNode() call is not creating temporary
+      // nodes. Ignore opaque integers because they do not constant fold.
+      SDValue Elt = BV->getOperand(Index);
+      auto *C = dyn_cast<ConstantSDNode>(Elt);
+      if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+        return Elt;
+    }
+
+    return SDValue();
+  };
+
+  APInt KnownUndef = APInt::getNullValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    // If both inputs for this element are either constant or undef and match
+    // the element type, compute the constant/undef result for this element of
+    // the vector.
+    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+    // not handle FP constants. The code within getNode() should be refactored
+    // to avoid the danger of creating a bogus temporary node here.
+    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+    if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+        KnownUndef.setBit(i);
+  }
+  return KnownUndef;
+}
+
 bool TargetLowering::SimplifyDemandedVectorElts(
     SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
@@ -1805,6 +1852,9 @@ bool TargetLowering::SimplifyDemandedVec
     }
     break;
   }
+
+  // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+  // MAX, saturated math, etc.
   case ISD::OR:
   case ISD::XOR:
   case ISD::ADD:
@@ -1814,15 +1864,17 @@ bool TargetLowering::SimplifyDemandedVec
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM: {
-    APInt SrcUndef, SrcZero;
-    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
-                                   SrcZero, TLO, Depth + 1))
+    APInt UndefRHS, ZeroRHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+                                   ZeroRHS, TLO, Depth + 1))
       return true;
-    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
-                                   KnownZero, TLO, Depth + 1))
+    APInt UndefLHS, ZeroLHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+                                   ZeroLHS, TLO, Depth + 1))
       return true;
-    KnownZero &= SrcZero;
-    KnownUndef &= SrcUndef;
+
+    KnownZero = ZeroLHS & ZeroRHS;
+    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
     break;
   }
   case ISD::AND: {
@@ -1836,6 +1888,8 @@ bool TargetLowering::SimplifyDemandedVec
 
     // If either side has a zero element, then the result element is zero, even
     // if the other is an UNDEF.
+    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+    // and then handle 'and' nodes with the rest of the binop opcodes.
     KnownZero |= SrcZero;
     KnownUndef &= SrcUndef;
     KnownUndef &= ~KnownZero;

Modified: llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll?rev=352880&r1=352879&r2=352880&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll Fri Feb  1 07:35:12 2019
@@ -51,10 +51,6 @@ define <8 x i32> @add_undef_elts(<4 x i3
 ;
 ; AVX-LABEL: add_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
@@ -71,11 +67,6 @@ define <8 x i32> @sub_undef_elts(<4 x i3
 ;
 ; AVX-LABEL: sub_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,u,42,43,44,12>
-; AVX-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
@@ -130,24 +121,10 @@ define <4 x i64> @or_undef_elts(<2 x i64
 define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
 ; SSE-LABEL: xor_undef_elts:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm2
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm0
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: xor_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2]
-; AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; AVX-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
-; AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
   %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>