[llvm-branch-commits] [llvm] 969918e - [DAG] Legalize umin(x, y) -> sub(x, usubsat(x, y)) and umax(x, y) -> add(x, usubsat(y, x)) iff usubsat is legal
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 27 03:23:46 PST 2020
Author: Simon Pilgrim
Date: 2020-11-27T11:18:58Z
New Revision: 969918e177adcfd526da7d8e21e5d76860e09c9e
URL: https://github.com/llvm/llvm-project/commit/969918e177adcfd526da7d8e21e5d76860e09c9e
DIFF: https://github.com/llvm/llvm-project/commit/969918e177adcfd526da7d8e21e5d76860e09c9e.diff
LOG: [DAG] Legalize umin(x,y) -> sub(x,usubsat(x,y)) and umax(x,y) -> add(x,usubsat(y,x)) iff usubsat is legal
If usubsat() is legal, this is likely to result in smaller codegen expansion than the default cmp+select codegen expansion.
Allows us to move the x86-specific lowering to the generic expansion code.
Differential Revision: https://reviews.llvm.org/D92183
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/Analysis/CostModel/AArch64/min-max.ll
llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3464fe87d99f..e45a311f84a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7478,10 +7478,26 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
EVT VT = Op0.getValueType();
+ unsigned Opcode = Node->getOpcode();
+ SDLoc DL(Node);
+
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
+ }
+
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::ADD, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
+ }
// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
ISD::CondCode CC;
- switch (Node->getOpcode()) {
+ switch (Opcode) {
default: llvm_unreachable("How did we get here?");
case ISD::SMAX: CC = ISD::SETGT; break;
case ISD::SMIN: CC = ISD::SETLT; break;
@@ -7494,7 +7510,6 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
- SDLoc DL(Node);
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 54bb3d0c7781..37a34023b8d0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -217,10 +217,17 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
- case Intrinsic::smin:
case Intrinsic::umin:
- case Intrinsic::smax:
case Intrinsic::umax: {
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::smin:
+ case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fcbe1330b546..5cbca95f45f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26959,22 +26959,6 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
if (VT == MVT::v32i16 || VT == MVT::v64i8)
return splitVectorIntBinary(Op, DAG);
- SDLoc DL(Op);
- unsigned Opcode = Op.getOpcode();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
-
- // For pre-SSE41, we can perform UMIN/UMAX v8i16 by using psubusw.
- if (VT == MVT::v8i16) {
- assert((Opcode == ISD::UMIN || Opcode == ISD::UMAX) &&
- "Unexpected MIN/MAX opcode");
- if (Opcode == ISD::UMIN)
- return DAG.getNode(ISD::SUB, DL, VT, N0,
- DAG.getNode(ISD::USUBSAT, DL, VT, N0, N1));
- return DAG.getNode(ISD::ADD, DL, VT,
- DAG.getNode(ISD::USUBSAT, DL, VT, N1, N0), N0);
- }
-
// Default to expand.
return SDValue();
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index f47fc1c49ef5..18bac082a66a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -115,12 +115,13 @@ define <8 x i32> @umin.v8i32(<8 x i32> %v0, <8 x i32> %v1) {
}
; COST-LABEL: umin.v2i64
-; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1)
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1)
; CODE-LABEL: umin.v2i64
; CODE: bb.0
-; CODE: csel
-; CODE: csel
+; CODE-NEXT: uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT: sub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT: ret
declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @umin.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
@@ -369,12 +370,13 @@ define <8 x i32> @umax.v8i32(<8 x i32> %v0, <8 x i32> %v1) {
}
; COST-LABEL: umax.v2i64
-; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1)
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1)
; CODE-LABEL: umax.v2i64
; CODE: bb.0
-; CODE: csel
-; CODE: csel
+; CODE-NEXT: uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT: add v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT: ret
declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
define <2 x i64> @umax.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
index c12d90fee09e..8ef3f307d0b5 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2179,8 +2179,8 @@ define <8 x i16> @vec128_i16_unsigned_reg_reg(<8 x i16> %a1, <8 x i16> %a2) noun
; SSE2-NEXT: psubusw %xmm1, %xmm2
; SSE2-NEXT: psubusw %xmm0, %xmm1
; SSE2-NEXT: psubw %xmm0, %xmm2
-; SSE2-NEXT: paddw %xmm0, %xmm2
; SSE2-NEXT: paddw %xmm1, %xmm2
+; SSE2-NEXT: paddw %xmm0, %xmm2
; SSE2-NEXT: psrlw $1, %xmm2
; SSE2-NEXT: pmullw %xmm3, %xmm2
; SSE2-NEXT: paddw %xmm0, %xmm2
More information about the llvm-branch-commits
mailing list