[llvm] SelectionDAG/expandFMINNUM_FMAXNUM: skips vector if SETCC/VSELECT is not legal (PR #109570)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 04:01:10 PDT 2024
https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/109570
>From d50cafd8713c939bc4be7e5364501e940e2fd78a Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Sun, 22 Sep 2024 15:21:17 +0800
Subject: [PATCH] SelectionDAG/expandFMINNUM_FMAXNUM: skips vector if
SETCC/VSELECT is not legal
If SETCC or VSELECT is not legal for vector, we should not expand it,
instead we can split the vectors.
So that, some simple scale instructions can be emitted instead of
some pairs of comparation+selection.
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 5 +
.../AArch64/vecreduce-fmax-legalization.ll | 191 +++++---------
.../AArch64/vecreduce-fmin-legalization.ll | 191 +++++---------
.../CodeGen/ARM/minnum-maxnum-intrinsics.ll | 159 +++++-------
.../CodeGen/Thumb2/mve-vecreduce-fminmax.ll | 232 +++++-------------
5 files changed, 258 insertions(+), 520 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f19975557a0a77..41d93bf20f250c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8424,6 +8424,11 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
if (Node->getFlags().hasNoNaNs()) {
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ EVT VT = Node->getValueType(0);
+ if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
+ VT.isVector())
+ return SDValue();
SDValue Op1 = Node->getOperand(0);
SDValue Op2 = Node->getOperand(1);
SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index c993051ccebf7c..ee2af110c84cd5 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -223,77 +223,69 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcvt s5, h0
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcmp s3, s2
-; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, gt
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2]
+; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2]
+; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2
+; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2]
+; CHECK-NOFP-SD-NEXT: fcvt h4, s4
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
+; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
+; CHECK-NOFP-SD-NEXT: fmaxnm s2, s4, s2
+; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
+; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt s4, h4
+; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
+; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4]
+; CHECK-NOFP-SD-NEXT: fcvt h4, s4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
+; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
+; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s4
+; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
+; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
+; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
+; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
+; CHECK-NOFP-SD-NEXT: fcvt h3, s4
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6]
-; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
-; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
-; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
+; CHECK-NOFP-SD-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
-; CHECK-NOFP-SD-NEXT: fcmp s0, s1
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
-; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s4
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
@@ -410,72 +402,44 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v11f16:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: ldr h17, [sp]
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: adrp x8, .LCPI14_0
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcvt s4, h4
-; CHECK-NOFP-NEXT: fcmp s1, s16
-; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
-; CHECK-NOFP-NEXT: fcmp s0, s17
+; CHECK-NOFP-NEXT: fmaxnm s1, s1, s16
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
-; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
-; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
-; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s1, s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fmov s1, w8
-; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
-; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s1, h3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h5
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h4
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h6
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h5
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h7
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h6
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h7
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -517,73 +481,44 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v11f16_ninf:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: ldr h17, [sp]
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: adrp x8, .LCPI15_0
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcvt s4, h4
-; CHECK-NOFP-NEXT: fcmp s1, s16
-; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
-; CHECK-NOFP-NEXT: fcmp s0, s17
+; CHECK-NOFP-NEXT: fmaxnm s1, s1, s16
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
-; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
-; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI15_0]
-; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
-; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s1, s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fmov s1, w8
-; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
-; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s1, h3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h5
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h4
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h6
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h5
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
-; CHECK-NOFP-NEXT: fcvt s4, h7
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h6
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h7
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 0116be51dd696a..300081dc3ec405 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -223,77 +223,69 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcvt s5, h0
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcmp s3, s2
-; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, lt
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2]
+; CHECK-NOFP-SD-NEXT: fminnm s4, s5, s4
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2]
+; CHECK-NOFP-SD-NEXT: fminnm s2, s3, s2
+; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2]
+; CHECK-NOFP-SD-NEXT: fcvt h4, s4
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fminnm s2, s3, s2
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
+; CHECK-NOFP-SD-NEXT: fminnm s3, s5, s3
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
+; CHECK-NOFP-SD-NEXT: fminnm s2, s4, s2
+; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
+; CHECK-NOFP-SD-NEXT: fminnm s4, s5, s4
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt s4, h4
+; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
+; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4]
+; CHECK-NOFP-SD-NEXT: fcvt h4, s4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
-; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
-; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
+; CHECK-NOFP-SD-NEXT: fminnm s3, s5, s3
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
+; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s4
+; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
+; CHECK-NOFP-SD-NEXT: fminnm s4, s5, s4
+; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
+; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
+; CHECK-NOFP-SD-NEXT: fcvt h3, s4
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6]
-; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
+; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
-; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
-; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
-; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
-; CHECK-NOFP-SD-NEXT: fcmp s5, s4
+; CHECK-NOFP-SD-NEXT: fminnm s0, s0, s1
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
-; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
-; CHECK-NOFP-SD-NEXT: fcmp s0, s1
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
-; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, lt
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fminnm s3, s5, s4
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s3, h3
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
+; CHECK-NOFP-SD-NEXT: fcvt s3, h3
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
@@ -410,72 +402,44 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v11f16:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: ldr h17, [sp]
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: adrp x8, .LCPI14_0
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcvt s4, h4
-; CHECK-NOFP-NEXT: fcmp s1, s16
-; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt
-; CHECK-NOFP-NEXT: fcmp s0, s17
+; CHECK-NOFP-NEXT: fminnm s1, s1, s16
+; CHECK-NOFP-NEXT: fminnm s0, s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
-; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s2, s16, lt
-; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
-; CHECK-NOFP-NEXT: mov w8, #2139095040 // =0x7f800000
-; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fminnm s1, s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fmov s1, w8
-; CHECK-NOFP-NEXT: fcsel s3, s3, s1, lt
-; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s1, h3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h5
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h4
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h6
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h5
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h7
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h6
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcsel s1, s4, s1, lt
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h7
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
@@ -517,73 +481,44 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-LABEL: test_v11f16_ninf:
; CHECK-NOFP: // %bb.0:
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: ldr h17, [sp]
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: adrp x8, .LCPI15_0
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcvt s4, h4
-; CHECK-NOFP-NEXT: fcmp s1, s16
-; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt
-; CHECK-NOFP-NEXT: fcmp s0, s17
+; CHECK-NOFP-NEXT: fminnm s1, s1, s16
+; CHECK-NOFP-NEXT: fminnm s0, s0, s17
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
; CHECK-NOFP-NEXT: fcvt s16, h16
-; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt
; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s2, s16, lt
-; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI15_0]
-; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
-; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: movk w8, #18303, lsl #16
-; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fminnm s1, s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fmov s1, w8
-; CHECK-NOFP-NEXT: fcsel s3, s3, s1, lt
-; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s1, h3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h5
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h4
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h6
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h5
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
-; CHECK-NOFP-NEXT: fcvt s4, h7
-; CHECK-NOFP-NEXT: fcvt h3, s3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h6
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcmp s4, s2
-; CHECK-NOFP-NEXT: fcvt s3, h3
-; CHECK-NOFP-NEXT: fcsel s1, s4, s1, lt
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s3
-; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-NEXT: fcvt s1, h7
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 528bfe0411730a..feb790821e8754 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -609,26 +609,21 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
; ARMV7-LABEL: fminnumv432_one_zero_intrinsic:
; ARMV7: @ %bb.0:
-; ARMV7-NEXT: vmov d3, r2, r3
-; ARMV7-NEXT: vmov d2, r0, r1
-; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
-; ARMV7-NEXT: vcmp.f32 s5, #0
-; ARMV7-NEXT: vldr s1, .LCPI18_0
+; ARMV7-NEXT: vmov d1, r2, r3
+; ARMV7-NEXT: vldr s8, .LCPI18_0
+; ARMV7-NEXT: vmov d0, r0, r1
+; ARMV7-NEXT: vmov.f32 s10, #-1.000000e+00
+; ARMV7-NEXT: vcmp.f32 s1, #0
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vcmp.f32 s7, s0
-; ARMV7-NEXT: vmovlt.f32 s1, s5
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmov.f32 s3, s0
-; ARMV7-NEXT: vcmp.f32 s6, s0
-; ARMV7-NEXT: vmovlt.f32 s3, s7
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmov.f32 s2, s0
-; ARMV7-NEXT: vcmp.f32 s4, s0
-; ARMV7-NEXT: vmovlt.f32 s2, s6
-; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT: vmovlt.f32 s0, s4
-; ARMV7-NEXT: vmov r2, r3, d1
-; ARMV7-NEXT: vmov r0, r1, d0
+; ARMV7-NEXT: vmov.f32 s4, s3
+; ARMV7-NEXT: vmin.f32 d6, d2, d5
+; ARMV7-NEXT: vmin.f32 d3, d1, d5
+; ARMV7-NEXT: vmin.f32 d2, d0, d5
+; ARMV7-NEXT: vmovlt.f32 s8, s1
+; ARMV7-NEXT: vmov.f32 s5, s8
+; ARMV7-NEXT: vmov.f32 s7, s12
+; ARMV7-NEXT: vmov r0, r1, d2
+; ARMV7-NEXT: vmov r2, r3, d3
; ARMV7-NEXT: bx lr
; ARMV7-NEXT: .p2align 2
; ARMV7-NEXT: @ %bb.1:
@@ -918,15 +913,11 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d17, d19
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vminnm.f64 d19, d19, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
@@ -970,15 +961,11 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d17, d19
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vminnm.f64 d19, d19, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
@@ -1020,16 +1007,12 @@ define <2 x double> @fminnumv264_non_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fminnumv264_non_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d17, r0, r1
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
-; ARMV8-NEXT: vcmp.f64 d16, d17
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vminnm.f64 d18, d18, d16
+; ARMV8-NEXT: vminnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
@@ -1070,18 +1053,14 @@ define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fminnumv264_one_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, #0
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00
-; ARMV8-NEXT: vcmp.f64 d16, d18
+; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vmov.i32 d17, #0x0
-; ARMV8-NEXT: vmovlt.f64 d17, d19
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r2, r3, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d19, r2, r3
+; ARMV8-NEXT: vminnm.f64 d17, d19, d17
; ARMV8-NEXT: vmov r0, r1, d16
+; ARMV8-NEXT: vmov r2, r3, d17
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic:
@@ -1129,15 +1108,11 @@ define <2 x double> @fmaxnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d17
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
@@ -1181,15 +1156,11 @@ define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d17
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
@@ -1236,18 +1207,14 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fmaxnumv264_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vldr d16, .LCPI30_0
-; ARMV8-NEXT: vcmp.f64 d18, #0
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d16
+; ARMV8-NEXT: vmov d18, r2, r3
; ARMV8-NEXT: vmov.i32 d17, #0x0
-; ARMV8-NEXT: vselgt.f64 d17, d18, d17
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d19, d16
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17
; ARMV8-NEXT: vmov r2, r3, d16
+; ARMV8-NEXT: vmov r0, r1, d17
; ARMV8-NEXT: bx lr
; ARMV8-NEXT: .p2align 3
; ARMV8-NEXT: @ %bb.1:
@@ -1307,15 +1274,11 @@ define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) {
; ARMV8-LABEL: fmaxnumv264_minus_zero_intrinsic:
; ARMV8: @ %bb.0:
; ARMV8-NEXT: vldr d16, .LCPI31_0
-; ARMV8-NEXT: vmov d17, r0, r1
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
; ARMV8-NEXT: .p2align 3
@@ -1367,15 +1330,11 @@ define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) {
; ARMV8-LABEL: fmaxnumv264_non_zero_intrinsic:
; ARMV8: @ %bb.0:
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
-; ARMV8-NEXT: vmov d17, r0, r1
-; ARMV8-NEXT: vcmp.f64 d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index 7cafb7262f460d..be737961e3ae72 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -43,21 +43,13 @@ define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) {
;
; CHECK-NOFP-LABEL: fmin_v8f32:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s7, s3
+; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
-; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
-; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
@@ -129,44 +121,28 @@ define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
;
; CHECK-NOFP-LABEL: fmin_v16f16:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT: vcmp.f16 s8, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s5, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s8, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s8, s4
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
-; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s7, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: bx lr
entry:
@@ -435,21 +411,13 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
;
; CHECK-NOFP-LABEL: fmin_v8f32_acc:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s7, s3
+; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s10
-; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
-; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
@@ -547,44 +515,28 @@ define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
;
; CHECK-NOFP-LABEL: fmin_v16f16_acc:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
-; CHECK-NOFP-NEXT: vcmp.f16 s10, s12
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s5, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s10
-; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s10, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s10, s4
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
-; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s7, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
@@ -917,21 +869,13 @@ define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) {
;
; CHECK-NOFP-LABEL: fmax_v8f32:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
-; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s2, s6
-; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s3, s7
+; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5
+; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s8
-; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
+; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
-; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
@@ -1003,44 +947,28 @@ define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
;
; CHECK-NOFP-LABEL: fmax_v16f16:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT: vcmp.f16 s10, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s1, s5
-; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
+; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
+; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
-; CHECK-NOFP-NEXT: vcmp.f16 s8, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s6
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s8, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s8, s4
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
-; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s3, s7
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
+; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: bx lr
entry:
@@ -1309,21 +1237,13 @@ define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
;
; CHECK-NOFP-LABEL: fmax_v8f32_acc:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
-; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s2, s6
-; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s3, s7
+; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5
+; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s10
-; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
+; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
-; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
entry:
@@ -1421,44 +1341,28 @@ define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
;
; CHECK-NOFP-LABEL: fmax_v16f16_acc:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
-; CHECK-NOFP-NEXT: vcmp.f16 s12, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s1, s5
-; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
+; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
+; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s10
-; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
-; CHECK-NOFP-NEXT: vcmp.f16 s10, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s6
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s10, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s10, s4
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
-; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s3, s7
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
+; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
+; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
; CHECK-NOFP-NEXT: bx lr
More information about the llvm-commits
mailing list