[llvm] r355517 - [DAGCombiner] Enable UADDO/USUBO vector combine support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 08:11:03 PST 2019
Author: rksimon
Date: Wed Mar 6 08:11:03 2019
New Revision: 355517
URL: http://llvm.org/viewvc/llvm-project?rev=355517&view=rev
Log:
[DAGCombiner] Enable UADDO/USUBO vector combine support
Differential Revision: https://reviews.llvm.org/D58965
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/combine-addo.ll
llvm/trunk/test/CodeGen/X86/combine-subo.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=355517&r1=355516&r2=355517&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Mar 6 08:11:03 2019
@@ -2443,8 +2443,6 @@ SDValue DAGCombiner::visitUADDO(SDNode *
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2455,13 +2453,12 @@ SDValue DAGCombiner::visitUADDO(SDNode *
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
// fold (uaddo x, 0) -> x + no carry out
- if (isNullConstant(N1))
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an add.
@@ -2488,7 +2485,9 @@ SDValue DAGCombiner::visitUADDO(SDNode *
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
- auto VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
@@ -2952,8 +2951,6 @@ SDValue DAGCombiner::visitUSUBO(SDNode *
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2969,11 +2966,11 @@ SDValue DAGCombiner::visitUSUBO(SDNode *
DAG.getConstant(0, DL, CarryVT));
// fold (usubo x, 0) -> x + no borrow
- if (isNullConstant(N1))
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (isAllOnesConstant(N0))
+ if (isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
Modified: llvm/trunk/test/CodeGen/X86/combine-addo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-addo.ll?rev=355517&r1=355516&r2=355517&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-addo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-addo.ll Wed Mar 6 08:11:03 2019
@@ -62,18 +62,10 @@ define i32 @combine_uadd_zero(i32 %a0, i
define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_uadd_zero:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pmaxud %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_uadd_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpmaxud %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
@@ -108,24 +100,23 @@ define i32 @combine_uadd_not(i32 %a0, i3
define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_uadd_not:
; SSE: # %bb.0:
-; SSE-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psubd %xmm2, %xmm3
-; SSE-NEXT: pmaxud %xmm3, %xmm0
-; SSE-NEXT: pcmpeqd %xmm3, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm3, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: psubd %xmm0, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1]
+; SSE-NEXT: pmaxud %xmm2, %xmm0
+; SSE-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_uadd_not:
; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vpmaxud %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX-NEXT: vpmaxud %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = xor <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
%2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
Modified: llvm/trunk/test/CodeGen/X86/combine-subo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-subo.ll?rev=355517&r1=355516&r2=355517&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-subo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-subo.ll Wed Mar 6 08:11:03 2019
@@ -62,18 +62,10 @@ define i32 @combine_usub_zero(i32 %a0, i
define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_zero:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpminud %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
@@ -138,20 +130,12 @@ define i32 @combine_usub_self(i32 %a0, i
define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_self:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psubd %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm2, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_self:
; AVX: # %bb.0:
-; AVX-NEXT: vpsubd %xmm0, %xmm0, %xmm2
-; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
@@ -183,22 +167,14 @@ define i32 @combine_usub_negone(i32 %a0,
define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_vec_usub_negone:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE-NEXT: pxor %xmm0, %xmm2
-; SSE-NEXT: pminud %xmm2, %xmm0
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: blendvps %xmm0, %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_usub_negone:
; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpminud %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2
-; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0)
%2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0
More information about the llvm-commits
mailing list