[llvm] b01417d - [AArch64] Optimise min/max lowering in ISel
Irina Dobrescu via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 2 05:40:40 PDT 2021
Author: Irina Dobrescu
Date: 2021-08-02T13:40:21+01:00
New Revision: b01417d3c58d5438c8bdb0762da0e882f905ef7f
URL: https://github.com/llvm/llvm-project/commit/b01417d3c58d5438c8bdb0762da0e882f905ef7f
DIFF: https://github.com/llvm/llvm-project/commit/b01417d3c58d5438c8bdb0762da0e882f905ef7f.diff
LOG: [AArch64] Optimise min/max lowering in ISel
Differential Revision: https://reviews.llvm.org/D106561
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/min-max.ll
llvm/test/CodeGen/AArch64/min-max.ll
llvm/test/CodeGen/AArch64/minmax.ll
llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ca6b87a5ebb04..4a13fa2d662a7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1040,6 +1040,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
+ for (auto VT : {MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ }
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -4825,17 +4831,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
- /*OverrideNEON=*/true);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -7131,6 +7130,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
+SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+ ISD::CondCode CC;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ CC = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ CC = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ CC = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ CC = ISD::SETULT;
+ break;
+ }
+
+ if (VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::SMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
+ /*OverrideNEON=*/true);
+ }
+ }
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+}
+
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2b337255fc275..f2663e2aeb85a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -966,6 +966,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 01236aa6b5276..8e0f06c24e2fc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -220,19 +220,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
case Intrinsic::umin:
- case Intrinsic::umax: {
- auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
- // umin(x,y) -> sub(x,usubsat(x,y))
- // umax(x,y) -> add(x,usubsat(y,x))
- if (LT.second == MVT::v2i64)
- return LT.first * 2;
- LLVM_FALLTHROUGH;
- }
+ case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // v2i64 types get converted to cmp+bif hence the cost of 2
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
return LT.first;
break;
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index 7843b4b2c980e..6e4839141816e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -96,8 +96,8 @@ define void @reduce_smin() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
@@ -135,8 +135,8 @@ define void @reduce_smax() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll
index 1536a6d0cb9d9..95b3f85af0ae9 100644
--- a/llvm/test/CodeGen/AArch64/min-max.ll
+++ b/llvm/test/CodeGen/AArch64/min-max.ll
@@ -185,13 +185,8 @@ declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smax1i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-ISEL-NEXT: fmov x8, d1
-; CHECK-ISEL-NEXT: fmov x9, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
-; CHECK-ISEL-NEXT: fmov d0, x8
+; CHECK-ISEL-NEXT: cmgt d2, d0, d1
+; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smax1i64:
@@ -210,16 +205,8 @@ declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smax2i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: mov x8, v1.d[1]
-; CHECK-ISEL-NEXT: mov x9, v0.d[1]
-; CHECK-ISEL-NEXT: fmov x10, d1
-; CHECK-ISEL-NEXT: fmov x11, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: csel x9, x11, x10, gt
-; CHECK-ISEL-NEXT: fmov d0, x9
-; CHECK-ISEL-NEXT: mov v0.d[1], x8
+; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smax2i64:
@@ -238,26 +225,10 @@ declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smax4i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: mov x8, v2.d[1]
-; CHECK-ISEL-NEXT: mov x9, v0.d[1]
-; CHECK-ISEL-NEXT: fmov x10, d2
-; CHECK-ISEL-NEXT: fmov x11, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: mov x9, v3.d[1]
-; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
-; CHECK-ISEL-NEXT: mov x11, v1.d[1]
-; CHECK-ISEL-NEXT: cmp x11, x9
-; CHECK-ISEL-NEXT: fmov d0, x10
-; CHECK-ISEL-NEXT: fmov x10, d3
-; CHECK-ISEL-NEXT: csel x9, x11, x9, gt
-; CHECK-ISEL-NEXT: fmov x11, d1
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
-; CHECK-ISEL-NEXT: fmov d1, x10
-; CHECK-ISEL-NEXT: mov v0.d[1], x8
-; CHECK-ISEL-NEXT: mov v1.d[1], x9
+; CHECK-ISEL-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-ISEL-NEXT: cmgt v5.2d, v1.2d, v3.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@@ -457,13 +428,8 @@ declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umax1i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-ISEL-NEXT: fmov x8, d1
-; CHECK-ISEL-NEXT: fmov x9, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, hi
-; CHECK-ISEL-NEXT: fmov d0, x8
+; CHECK-ISEL-NEXT: cmhi d2, d0, d1
+; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umax1i64:
@@ -482,8 +448,8 @@ declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umax2i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: uqsub v1.2d, v1.2d, v0.2d
-; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umax2i64:
@@ -502,10 +468,10 @@ declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umax4i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: uqsub v2.2d, v2.2d, v0.2d
-; CHECK-ISEL-NEXT: uqsub v3.2d, v3.2d, v1.2d
-; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-ISEL-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-ISEL-NEXT: cmhi v4.2d, v0.2d, v2.2d
+; CHECK-ISEL-NEXT: cmhi v5.2d, v1.2d, v3.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@@ -705,13 +671,8 @@ declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smin1i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-ISEL-NEXT: fmov x8, d1
-; CHECK-ISEL-NEXT: fmov x9, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
-; CHECK-ISEL-NEXT: fmov d0, x8
+; CHECK-ISEL-NEXT: cmgt d2, d1, d0
+; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smin1i64:
@@ -730,16 +691,8 @@ declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smin2i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: mov x8, v1.d[1]
-; CHECK-ISEL-NEXT: mov x9, v0.d[1]
-; CHECK-ISEL-NEXT: fmov x10, d1
-; CHECK-ISEL-NEXT: fmov x11, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: csel x9, x11, x10, lt
-; CHECK-ISEL-NEXT: fmov d0, x9
-; CHECK-ISEL-NEXT: mov v0.d[1], x8
+; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smin2i64:
@@ -758,26 +711,10 @@ declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smin4i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: mov x8, v2.d[1]
-; CHECK-ISEL-NEXT: mov x9, v0.d[1]
-; CHECK-ISEL-NEXT: fmov x10, d2
-; CHECK-ISEL-NEXT: fmov x11, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: mov x9, v3.d[1]
-; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
-; CHECK-ISEL-NEXT: mov x11, v1.d[1]
-; CHECK-ISEL-NEXT: cmp x11, x9
-; CHECK-ISEL-NEXT: fmov d0, x10
-; CHECK-ISEL-NEXT: fmov x10, d3
-; CHECK-ISEL-NEXT: csel x9, x11, x9, lt
-; CHECK-ISEL-NEXT: fmov x11, d1
-; CHECK-ISEL-NEXT: cmp x11, x10
-; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
-; CHECK-ISEL-NEXT: fmov d1, x10
-; CHECK-ISEL-NEXT: mov v0.d[1], x8
-; CHECK-ISEL-NEXT: mov v1.d[1], x9
+; CHECK-ISEL-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; CHECK-ISEL-NEXT: cmgt v5.2d, v3.2d, v1.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@@ -977,13 +914,8 @@ declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umin1i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-ISEL-NEXT: fmov x8, d1
-; CHECK-ISEL-NEXT: fmov x9, d0
-; CHECK-ISEL-NEXT: cmp x9, x8
-; CHECK-ISEL-NEXT: csel x8, x9, x8, lo
-; CHECK-ISEL-NEXT: fmov d0, x8
+; CHECK-ISEL-NEXT: cmhi d2, d1, d0
+; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umin1i64:
@@ -1002,8 +934,8 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umin2i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: uqsub v1.2d, v0.2d, v1.2d
-; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umin2i64:
@@ -1022,10 +954,10 @@ declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umin4i64:
; CHECK-ISEL: // %bb.0:
-; CHECK-ISEL-NEXT: uqsub v2.2d, v0.2d, v2.2d
-; CHECK-ISEL-NEXT: uqsub v3.2d, v1.2d, v3.2d
-; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v2.2d
-; CHECK-ISEL-NEXT: sub v1.2d, v1.2d, v3.2d
+; CHECK-ISEL-NEXT: cmhi v4.2d, v2.2d, v0.2d
+; CHECK-ISEL-NEXT: cmhi v5.2d, v3.2d, v1.2d
+; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll
index 5e99121cac2a9..045e0cbf1675f 100644
--- a/llvm/test/CodeGen/AArch64/minmax.ll
+++ b/llvm/test/CodeGen/AArch64/minmax.ll
@@ -160,10 +160,10 @@ define <2 x i64> @t14(<2 x i64> %a, <2 x i64> %b) {
define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: t15:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmhs v4.2d, v3.2d, v1.2d
-; CHECK-NEXT: cmhs v5.2d, v2.2d, v0.2d
-; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b
-; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b
+; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d
+; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-NEXT: ret
%t1 = icmp ule <4 x i64> %a, %b
%t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index cd60dd3ab96dc..9a15039799587 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -87,11 +87,11 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
define i64 @test_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: test_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, v0.d[1]
-; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: cmp x9, x8
-; CHECK-NEXT: csel x0, x9, x8, hi
-; CHECK-NEXT: ret
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: cmhi d2, d0, d1
+; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
ret i64 %b
}
More information about the llvm-commits
mailing list