[llvm] r284015 - [DAGCombiner] Update most ADD combines to support general vector combines

Wed Oct 12 06:48:10 PDT 2016

Author: rksimon
Date: Wed Oct 12 08:48:10 2016
New Revision: 284015

URL: http://llvm.org/viewvc/llvm-project?rev=284015&view=rev
Log:
[DAGCombiner] Update most ADD combines to support general vector combines

Add a number of helper functions to match scalar or vector equivalent constant/splat values to allow most of the combine patterns to be used by vectors.

Differential Revision: https://reviews.llvm.org/D25374

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/combine-add.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=284015&r1=284014&r2=284015&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Oct 12 08:48:10 2016
@@ -824,6 +824,45 @@ static ConstantFPSDNode *isConstOrConstS
   return nullptr;
 }
 
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+    return !(Const->isOpaque() && NoOpaques);
+  if (N.getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  for (const SDValue &Op : N->op_values()) {
+    if (Op.isUndef())
+      continue;
+    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+    if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+        (Const->isOpaque() && NoOpaques))
+      return false;
+  }
+  return true;
+}
+
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isNullValue();
+  return false;
+}
+
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+  return false;
+}
+
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                                     SDValue N1) {
   EVT VT = N0.getValueType();
@@ -1674,13 +1713,12 @@ SDValue DAGCombiner::visitADD(SDNode *N)
   if (isNullConstant(N1))
     return N0;
   // fold ((c1-A)+c2) -> (c1+c2)-A
-  if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
     if (N0.getOpcode() == ISD::SUB)
-      if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
         SDLoc DL(N);
         return DAG.getNode(ISD::SUB, DL, VT,
-                           DAG.getConstant(N1C->getAPIntValue()+
-                                           N0C->getAPIntValue(), DL, VT),
+                           DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
                            N0.getOperand(1));
       }
   }
@@ -1688,10 +1726,12 @@ SDValue DAGCombiner::visitADD(SDNode *N)
   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
     return RADD;
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
+  if (N0.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
+  if (N1.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   // fold (A+(B-A)) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1723,29 +1763,30 @@ SDValue DAGCombiner::visitADD(SDNode *N)
     SDValue N10 = N1.getOperand(0);
     SDValue N11 = N1.getOperand(1);
 
-    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+    if (isConstantOrConstantVector(N00) ||
+        isConstantOrConstantVector(N10))
       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   }
 
-  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (a+b) -> (a|b) iff a and b share no bits.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
-      VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+      VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N1.getOperand(0).getOperand(0)))
+      isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
                                    N1.getOperand(0).getOperand(1),
                                    N1.getOperand(1)));
   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N0.getOperand(0).getOperand(0)))
+      isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
                                    N0.getOperand(0).getOperand(1),
@@ -1758,7 +1799,8 @@ SDValue DAGCombiner::visitADD(SDNode *N)
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
+    if (NumSignBits == DestBits &&
+        isOneConstantOrOneSplatConstant(N1->getOperand(1))) {
       SDLoc DL(N);
       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
     }

Modified: llvm/trunk/test/CodeGen/X86/combine-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-add.ll?rev=284015&r1=284014&r2=284015&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-add.ll Wed Oct 12 08:48:10 2016
@@ -19,18 +19,15 @@ define <4 x i32> @combine_vec_add_to_zer
 define <4 x i32> @combine_vec_add_constant_sub(<4 x i32> %a) {
 ; SSE-LABEL: combine_vec_add_constant_sub:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,2,3]
-; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,2,4,6]
 ; SSE-NEXT:    psubd %xmm0, %xmm1
-; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_constant_sub:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3]
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,2,4,6]
 ; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %a
   %2 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %1
@@ -41,17 +38,13 @@ define <4 x i32> @combine_vec_add_consta
 define <4 x i32> @combine_vec_add_neg0(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: combine_vec_add_neg0:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    psubd %xmm0, %xmm2
-; SSE-NEXT:    paddd %xmm1, %xmm2
-; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    psubd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_neg0:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> zeroinitializer, %a
   %2 = add <4 x i32> %1, %b
@@ -62,16 +55,12 @@ define <4 x i32> @combine_vec_add_neg0(<
 define <4 x i32> @combine_vec_add_neg1(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: combine_vec_add_neg1:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    psubd %xmm1, %xmm2
-; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_neg1:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> zeroinitializer, %b
   %2 = add <4 x i32> %a, %1
@@ -186,18 +175,16 @@ define <4 x i32> @combine_vec_add_sub_ad
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # BB#0:
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
+; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,2,3]
-; SSE-NEXT:    psubd %xmm2, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3]
-; AVX-NEXT:    vpsubd %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
@@ -209,18 +196,18 @@ define <4 x i32> @combine_vec_add_sub_su
 define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) {
 ; SSE-LABEL: combine_vec_add_uniquebits:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE-NEXT:    orps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_uniquebits:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = and <4 x i32> %a, <i32 61680, i32 61680, i32 61680, i32 61680>
   %2 = and <4 x i32> %b, <i32 3855, i32 3855, i32 3855, i32 3855>
@@ -232,18 +219,14 @@ define <4 x i32> @combine_vec_add_unique
 define <4 x i32> @combine_vec_add_shl_neg0(<4 x i32> %x, <4 x i32> %y) {
 ; SSE-LABEL: combine_vec_add_shl_neg0:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    psubd %xmm1, %xmm2
-; SSE-NEXT:    pslld $5, %xmm2
-; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    pslld $5, %xmm1
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_shl_neg0:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
 ; AVX-NEXT:    vpslld $5, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> zeroinitializer, %y
   %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
@@ -255,18 +238,14 @@ define <4 x i32> @combine_vec_add_shl_ne
 define <4 x i32> @combine_vec_add_shl_neg1(<4 x i32> %x, <4 x i32> %y) {
 ; SSE-LABEL: combine_vec_add_shl_neg1:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    psubd %xmm1, %xmm2
-; SSE-NEXT:    pslld $5, %xmm2
-; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    pslld $5, %xmm1
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_shl_neg1:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
 ; AVX-NEXT:    vpslld $5, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> zeroinitializer, %y
   %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
@@ -280,15 +259,13 @@ define <4 x i32> @combine_vec_add_and_co
 ; SSE-LABEL: combine_vec_add_and_compare:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
-; SSE-NEXT:    psrld $31, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_and_compare:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpsrld $31, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = icmp eq <4 x i32> %a1, %a2
   %2 = sext <4 x i1> %1 to <4 x i32>