[llvm] [RISCV] Add reductions to list of roots in tryToReduceVL (PR #107595)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 07:48:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This allows us to reduce VLs feeding reduction instructions. In particular, this means that <3 x Ty> reduce(load) like sequences no longer require a VL toggle.
This was waiting on 3d72957; now that the latent correctness issue is fixed, we can expand this transform.
---
Full diff: https://github.com/llvm/llvm-project/pull/107595.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+18)
- (modified) llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll (+17-32)
- (modified) llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll (+28-28)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index db8e496493c417..00ffebb914098e 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -144,6 +144,24 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) {
case RISCV::VMERGE_VVM:
SrcIdx = 3; // TODO: We can also handle the false operand.
break;
+ case RISCV::VREDSUM_VS:
+ case RISCV::VREDMAXU_VS:
+ case RISCV::VREDMAX_VS:
+ case RISCV::VREDMINU_VS:
+ case RISCV::VREDMIN_VS:
+ case RISCV::VREDAND_VS:
+ case RISCV::VREDOR_VS:
+ case RISCV::VREDXOR_VS:
+ case RISCV::VWREDSUM_VS:
+ case RISCV::VWREDSUMU_VS:
+ case RISCV::VFREDUSUM_VS:
+ case RISCV::VFREDOSUM_VS:
+ case RISCV::VFREDMAX_VS:
+ case RISCV::VFREDMIN_VS:
+ case RISCV::VFWREDUSUM_VS:
+ case RISCV::VFWREDOSUM_VS:
+ SrcIdx = 2;
+ break;
}
MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
diff --git a/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll b/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
index 3d367ddc59bca7..5d588ad66b9ca9 100644
--- a/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
+++ b/llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll
@@ -19,7 +19,7 @@ define signext i32 @sum(ptr %a, i32 signext %n, i1 %prof.min.iters.check, <vscal
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_4: # %vector.ph
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 6e5ab436fc02d0..a8798474d669ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -121,10 +121,9 @@ define i32 @reduce_sum_16xi32_prefix2(ptr %p) {
define i32 @reduce_sum_16xi32_prefix3(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -160,10 +159,9 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) {
define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -183,10 +181,9 @@ define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
define i32 @reduce_sum_16xi32_prefix6(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix6:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -208,10 +205,9 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) {
define i32 @reduce_sum_16xi32_prefix7(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -263,10 +259,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) {
define i32 @reduce_sum_16xi32_prefix9(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix9:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 9, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetivli zero, 9, e32, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -294,10 +289,9 @@ define i32 @reduce_sum_16xi32_prefix9(ptr %p) {
define i32 @reduce_sum_16xi32_prefix13(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix13:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 13, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetivli zero, 13, e32, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -334,10 +328,9 @@ define i32 @reduce_sum_16xi32_prefix13(ptr %p) {
define i32 @reduce_sum_16xi32_prefix14(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix14:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 14, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetivli zero, 14, e32, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -375,10 +368,9 @@ define i32 @reduce_sum_16xi32_prefix14(ptr %p) {
define i32 @reduce_sum_16xi32_prefix15(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix15:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma
; CHECK-NEXT: vredsum.vs v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -499,10 +491,9 @@ define i32 @reduce_xor_16xi32_prefix2(ptr %p) {
define i32 @reduce_xor_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_xor_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -537,7 +528,7 @@ define i32 @reduce_and_16xi32_prefix2(ptr %p) {
define i32 @reduce_and_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_and_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, -1
@@ -576,10 +567,9 @@ define i32 @reduce_or_16xi32_prefix2(ptr %p) {
define i32 @reduce_or_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_or_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -619,11 +609,10 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) {
define i32 @reduce_smax_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_smax_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -658,12 +647,11 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) {
define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_smin_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredmin.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -698,10 +686,9 @@ define i32 @reduce_umax_16xi32_prefix2(ptr %p) {
define i32 @reduce_umax_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_umax_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -736,7 +723,7 @@ define i32 @reduce_umin_16xi32_prefix2(ptr %p) {
define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
; RV32-LABEL: reduce_umin_16xi32_prefix5:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: vsetivli zero, 5, e32, m1, ta, ma
; RV32-NEXT: vmv.v.i v10, -1
@@ -747,11 +734,10 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
;
; RV64-LABEL: reduce_umin_16xi32_prefix5:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: li a0, -1
; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; RV64-NEXT: vredminu.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -787,11 +773,10 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) {
define float @reduce_fadd_16xi32_prefix5(ptr %p) {
; CHECK-LABEL: reduce_fadd_16xi32_prefix5:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index 7f2e3cdbfd0e3c..7d78fa5a8f3ef2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -22,10 +22,10 @@ define half @vpreduce_fadd_nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1
;
; ZVFHMIN-LABEL: vpreduce_fadd_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t
@@ -48,10 +48,10 @@ define half @vpreduce_ord_fadd_nxv1f16(half %s, <vscale x 1 x half> %v, <vscale
;
; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv1f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t
@@ -76,10 +76,10 @@ define half @vpreduce_fadd_nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2
;
; ZVFHMIN-LABEL: vpreduce_fadd_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t
@@ -102,10 +102,10 @@ define half @vpreduce_ord_fadd_nxv2f16(half %s, <vscale x 2 x half> %v, <vscale
;
; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv2f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t
@@ -130,10 +130,10 @@ define half @vpreduce_fadd_nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4
;
; ZVFHMIN-LABEL: vpreduce_fadd_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v10, v8, v0.t
@@ -156,10 +156,10 @@ define half @vpreduce_ord_fadd_nxv4f16(half %s, <vscale x 4 x half> %v, <vscale
;
; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv4f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v10, v8, v0.t
@@ -233,10 +233,10 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a0, a4
; ZVFHMIN-NEXT: .LBB6_6:
-; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
@@ -245,20 +245,20 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vmv1r.v v0, v6
-; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
@@ -267,9 +267,9 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
@@ -339,10 +339,10 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a0, a4
; ZVFHMIN-NEXT: .LBB7_6:
-; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
@@ -351,20 +351,20 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vmv1r.v v0, v6
-; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
@@ -373,9 +373,9 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
-; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
``````````
</details>
https://github.com/llvm/llvm-project/pull/107595
More information about the llvm-commits
mailing list