[llvm] 23323e2 - [TargetLowering][RISCV] Propagate fastmath flags for the vector operations emitted in expandVecReduce. (#85164)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 08:39:36 PDT 2024
Author: Craig Topper
Date: 2024-03-14T08:39:32-07:00
New Revision: 23323e2837d3282c194df6239a7f1a5494c17907
URL: https://github.com/llvm/llvm-project/commit/23323e2837d3282c194df6239a7f1a5494c17907
DIFF: https://github.com/llvm/llvm-project/commit/23323e2837d3282c194df6239a7f1a5494c17907.diff
LOG: [TargetLowering][RISCV] Propagate fastmath flags for the vector operations emitted in expandVecReduce. (#85164)
We used the fastmath flags for any scalar ops created, but not vector.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b3dc9de7137311..57f8fc409de453 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10694,7 +10694,7 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
- Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
VT = HalfVT;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 68740eec56e4c4..073b60b47343d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1619,15 +1619,10 @@ define float @vreduce_fminimum_v2f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v2f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <2 x float>, ptr %x
@@ -1670,24 +1665,14 @@ define float @vreduce_fminimum_v4f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v4f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <4 x float>, ptr %x
@@ -1739,33 +1724,18 @@ define float @vreduce_fminimum_v8f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v8f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <8 x float>, ptr %x
@@ -1826,42 +1796,22 @@ define float @vreduce_fminimum_v16f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v16f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x float>, ptr %x
@@ -1933,51 +1883,26 @@ define float @vreduce_fminimum_v32f32_nonans(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <32 x float>, ptr %x
@@ -2073,51 +1998,26 @@ define float @vreduce_fminimum_v64f32_nonans(ptr %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, ptr %x
@@ -2281,51 +2181,26 @@ define float @vreduce_fminimum_v128f32_nonans(ptr %x) {
; CHECK-NEXT: vle32.v v0, (a1)
; CHECK-NEXT: vfmin.vv v16, v24, v16
; CHECK-NEXT: vfmin.vv v8, v8, v0
-; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x float>, ptr %x
@@ -2359,15 +2234,10 @@ define double @vreduce_fminimum_v2f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v2f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <2 x double>, ptr %x
@@ -2410,24 +2280,14 @@ define double @vreduce_fminimum_v4f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v4f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v10, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <4 x double>, ptr %x
@@ -2479,33 +2339,18 @@ define double @vreduce_fminimum_v8f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v8f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vle64.v v12, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <8 x double>, ptr %x
@@ -2566,42 +2411,22 @@ define double @vreduce_fminimum_v16f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fminimum_v16f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x double>, ptr %x
@@ -2686,42 +2511,22 @@ define double @vreduce_fminimum_v32f64_nonans(ptr %x) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <32 x double>, ptr %x
@@ -2874,42 +2679,22 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) {
; CHECK-NEXT: vle64.v v0, (a1)
; CHECK-NEXT: vfmin.vv v16, v24, v16
; CHECK-NEXT: vfmin.vv v8, v8, v0
-; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmin.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmin.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmin.vv v8, v11, v8
+; CHECK-NEXT: vfmin.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x double>, ptr %x
@@ -2943,15 +2728,10 @@ define float @vreduce_fmaximum_v2f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v2f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <2 x float>, ptr %x
@@ -2994,24 +2774,14 @@ define float @vreduce_fmaximum_v4f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v4f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <4 x float>, ptr %x
@@ -3063,33 +2833,18 @@ define float @vreduce_fmaximum_v8f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v8f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <8 x float>, ptr %x
@@ -3150,110 +2905,33 @@ define float @vreduce_fmaximum_v16f32_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v16f32_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
-; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
-; CHECK-NEXT: vfmv.f.s fa0, v8
-; CHECK-NEXT: ret
- %v = load <16 x float>, ptr %x
- %red = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v)
- ret float %red
-}
-
-declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>)
-
-define float @vreduce_fmaximum_v32f32(ptr %x) {
-; CHECK-LABEL: vreduce_fmaximum_v32f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
- %v = load <32 x float>, ptr %x
- %red = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v)
+ %v = load <16 x float>, ptr %x
+ %red = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %v)
ret float %red
}
-define float @vreduce_fmaximum_v32f32_nonans(ptr %x) {
-; CHECK-LABEL: vreduce_fmaximum_v32f32_nonans:
+declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>)
+
+define float @vreduce_fmaximum_v32f32(ptr %x) {
+; CHECK-LABEL: vreduce_fmaximum_v32f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
@@ -3303,6 +2981,38 @@ define float @vreduce_fmaximum_v32f32_nonans(ptr %x) {
; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
; CHECK-NEXT: vfmax.vv v8, v11, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %v = load <32 x float>, ptr %x
+ %red = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v)
+ ret float %red
+}
+
+define float @vreduce_fmaximum_v32f32_nonans(ptr %x) {
+; CHECK-LABEL: vreduce_fmaximum_v32f32_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v12
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <32 x float>, ptr %x
%red = call nnan float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %v)
@@ -3397,51 +3107,26 @@ define float @vreduce_fmaximum_v64f32_nonans(ptr %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x float>, ptr %x
@@ -3605,51 +3290,26 @@ define float @vreduce_fmaximum_v128f32_nonans(ptr %x) {
; CHECK-NEXT: vle32.v v0, (a1)
; CHECK-NEXT: vfmax.vv v16, v24, v16
; CHECK-NEXT: vfmax.vv v8, v8, v0
-; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 8
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 4
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v9, 2
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v11, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <128 x float>, ptr %x
@@ -3683,15 +3343,10 @@ define double @vreduce_fmaximum_v2f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v2f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v9, (a0)
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <2 x double>, ptr %x
@@ -3734,24 +3389,14 @@ define double @vreduce_fmaximum_v4f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v4f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v10, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <4 x double>, ptr %x
@@ -3803,33 +3448,18 @@ define double @vreduce_fmaximum_v8f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v8f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vle64.v v12, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <8 x double>, ptr %x
@@ -3890,42 +3520,22 @@ define double @vreduce_fmaximum_v16f64_nonans(ptr %x) {
; CHECK-LABEL: vreduce_fmaximum_v16f64_nonans:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x double>, ptr %x
@@ -4010,42 +3620,22 @@ define double @vreduce_fmaximum_v32f64_nonans(ptr %x) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v16, (a0)
-; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <32 x double>, ptr %x
@@ -4198,42 +3788,22 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) {
; CHECK-NEXT: vle64.v v0, (a1)
; CHECK-NEXT: vfmax.vv v16, v24, v16
; CHECK-NEXT: vfmax.vv v8, v8, v0
-; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 8
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v8, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v16
; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v12, 4
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmfeq.vv v8, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v16, v12, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v12
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v10, 2
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmfeq.vv v8, v10, v10
-; CHECK-NEXT: vmerge.vvm v9, v12, v10, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vfmax.vv v9, v9, v8
-; CHECK-NEXT: vslidedown.vi v10, v9, 1
+; CHECK-NEXT: vfmax.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vmfeq.vv v8, v9, v9
-; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0
-; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
-; CHECK-NEXT: vfmax.vv v8, v11, v8
+; CHECK-NEXT: vfmax.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <64 x double>, ptr %x
More information about the llvm-commits
mailing list