[llvm] r284015 - [DAGCombiner] Update most ADD combines to support general vector combines
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 12 06:48:10 PDT 2016
Author: rksimon
Date: Wed Oct 12 08:48:10 2016
New Revision: 284015
URL: http://llvm.org/viewvc/llvm-project?rev=284015&view=rev
Log:
[DAGCombiner] Update most ADD combines to support general vector combines
Add a number of helper functions to match scalar or vector equivalent constant/splat values to allow most of the combine patterns to be used by vectors.
Differential Revision: https://reviews.llvm.org/D25374
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/combine-add.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=284015&r1=284014&r2=284015&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Oct 12 08:48:10 2016
@@ -824,6 +824,45 @@ static ConstantFPSDNode *isConstOrConstS
return nullptr;
}
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+ return !(Const->isOpaque() && NoOpaques);
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+ if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+ (Const->isOpaque() && NoOpaques))
+ return false;
+ }
+ return true;
+}
+
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isNullValue();
+ return false;
+}
+
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+ return false;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@@ -1674,13 +1713,12 @@ SDValue DAGCombiner::visitADD(SDNode *N)
if (isNullConstant(N1))
return N0;
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+ if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
if (N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), DL, VT),
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
}
}
@@ -1688,10 +1726,12 @@ SDValue DAGCombiner::visitADD(SDNode *N)
if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
return RADD;
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
+ if (N1.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1723,29 +1763,30 @@ SDValue DAGCombiner::visitADD(SDNode *N)
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
- if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ if (isConstantOrConstantVector(N00) ||
+ isConstantOrConstantVector(N10))
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
- if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N1.getOperand(0).getOperand(0)))
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N0.getOperand(0).getOperand(0)))
+ isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N0.getOperand(0).getOperand(1),
@@ -1758,7 +1799,8 @@ SDValue DAGCombiner::visitADD(SDNode *N)
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
+ if (NumSignBits == DestBits &&
+ isOneConstantOrOneSplatConstant(N1->getOperand(1))) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
}
Modified: llvm/trunk/test/CodeGen/X86/combine-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-add.ll?rev=284015&r1=284014&r2=284015&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-add.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-add.ll Wed Oct 12 08:48:10 2016
@@ -19,18 +19,15 @@ define <4 x i32> @combine_vec_add_to_zer
define <4 x i32> @combine_vec_add_constant_sub(<4 x i32> %a) {
; SSE-LABEL: combine_vec_add_constant_sub:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3]
-; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2,4,6]
; SSE-NEXT: psubd %xmm0, %xmm1
-; SSE-NEXT: paddd %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_constant_sub:
; AVX: # BB#0:
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2,4,6]
; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %a
%2 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %1
@@ -41,17 +38,13 @@ define <4 x i32> @combine_vec_add_consta
define <4 x i32> @combine_vec_add_neg0(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_vec_add_neg0:
; SSE: # BB#0:
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm0, %xmm2
-; SSE-NEXT: paddd %xmm1, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: psubd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_neg0:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> zeroinitializer, %a
%2 = add <4 x i32> %1, %b
@@ -62,16 +55,12 @@ define <4 x i32> @combine_vec_add_neg0(<
define <4 x i32> @combine_vec_add_neg1(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_vec_add_neg1:
; SSE: # BB#0:
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm1, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_neg1:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> zeroinitializer, %b
%2 = add <4 x i32> %a, %1
@@ -186,18 +175,16 @@ define <4 x i32> @combine_vec_add_sub_ad
define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
; SSE-LABEL: combine_vec_add_sub_sub:
; SSE: # BB#0:
+; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
+; SSE-NEXT: paddd %xmm2, %xmm1
; SSE-NEXT: psubd %xmm1, %xmm0
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3]
-; SSE-NEXT: psubd %xmm2, %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_sub_sub:
; AVX: # BB#0:
+; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3]
-; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> %a, %b
%2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
@@ -209,18 +196,18 @@ define <4 x i32> @combine_vec_add_sub_su
define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_vec_add_uniquebits:
; SSE: # BB#0:
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: andps {{.*}}(%rip), %xmm1
+; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_uniquebits:
; AVX: # BB#0:
-; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = and <4 x i32> %a, <i32 61680, i32 61680, i32 61680, i32 61680>
%2 = and <4 x i32> %b, <i32 3855, i32 3855, i32 3855, i32 3855>
@@ -232,18 +219,14 @@ define <4 x i32> @combine_vec_add_unique
define <4 x i32> @combine_vec_add_shl_neg0(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_add_shl_neg0:
; SSE: # BB#0:
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm1, %xmm2
-; SSE-NEXT: pslld $5, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: pslld $5, %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_shl_neg0:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpslld $5, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> zeroinitializer, %y
%2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
@@ -255,18 +238,14 @@ define <4 x i32> @combine_vec_add_shl_ne
define <4 x i32> @combine_vec_add_shl_neg1(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_add_shl_neg1:
; SSE: # BB#0:
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm1, %xmm2
-; SSE-NEXT: pslld $5, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: pslld $5, %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_shl_neg1:
; AVX: # BB#0:
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpslld $5, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> zeroinitializer, %y
%2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
@@ -280,15 +259,13 @@ define <4 x i32> @combine_vec_add_and_co
; SSE-LABEL: combine_vec_add_and_compare:
; SSE: # BB#0:
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE-NEXT: psrld $31, %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_and_compare:
; AVX: # BB#0:
; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpsrld $31, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = icmp eq <4 x i32> %a1, %a2
%2 = sext <4 x i1> %1 to <4 x i32>
More information about the llvm-commits
mailing list