[llvm] [RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default (PR #89927)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 5 10:55:35 PDT 2024
https://github.com/asb updated https://github.com/llvm/llvm-project/pull/89927
>From a0e87e38a87512cc229ba4218dc8002801001417 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 24 Apr 2024 14:22:33 +0100
Subject: [PATCH] [RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by
default
This avoids some cases where LSR produces results that lead to very poor
codegen. There's a chance we'll see minor degradations for some inputs
in the case that our metrics say the found solution is worse, but in
reality it's better than the starting point.
---
.../Target/RISCV/RISCVTargetTransformInfo.h | 2 +
.../RISCV/rvv/dont-sink-splat-operands.ll | 137 +++--
.../CodeGen/RISCV/rvv/sink-splat-operands.ll | 518 +++++++++---------
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 40 +-
.../LoopStrengthReduce/RISCV/icmp-zero.ll | 6 +-
.../RISCV/lsr-cost-compare.ll | 29 +-
.../RISCV/lsr-drop-solution-dbg-msg.ll | 2 -
7 files changed, 357 insertions(+), 377 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index a4d1390875095..d8f85e25c0aed 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool shouldFoldTerminatingConditionAfterLSR() const {
return true;
}
+
+ bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index 2b4b8e979f3d7..92639be0017e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -86,30 +86,29 @@ declare i64 @llvm.vscale.i64()
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; NO-SINK-LABEL: sink_splat_add_scalable:
; NO-SINK: # %bb.0: # %entry
-; NO-SINK-NEXT: csrr a5, vlenb
-; NO-SINK-NEXT: srli a2, a5, 1
+; NO-SINK-NEXT: csrr a2, vlenb
+; NO-SINK-NEXT: srli a2, a2, 1
; NO-SINK-NEXT: li a3, 1024
; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
; NO-SINK-NEXT: # %bb.1:
; NO-SINK-NEXT: li a3, 0
; NO-SINK-NEXT: j .LBB1_5
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
+; NO-SINK-NEXT: li a5, 0
; NO-SINK-NEXT: addi a3, a2, -1
; NO-SINK-NEXT: andi a4, a3, 1024
; NO-SINK-NEXT: xori a3, a4, 1024
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; NO-SINK-NEXT: vmv.v.x v8, a1
-; NO-SINK-NEXT: slli a5, a5, 1
-; NO-SINK-NEXT: mv a6, a0
-; NO-SINK-NEXT: mv a7, a3
; NO-SINK-NEXT: .LBB1_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT: slli a6, a5, 2
+; NO-SINK-NEXT: add a6, a0, a6
; NO-SINK-NEXT: vl2re32.v v10, (a6)
; NO-SINK-NEXT: vadd.vv v10, v10, v8
+; NO-SINK-NEXT: add a5, a5, a2
; NO-SINK-NEXT: vs2r.v v10, (a6)
-; NO-SINK-NEXT: sub a7, a7, a2
-; NO-SINK-NEXT: add a6, a6, a5
-; NO-SINK-NEXT: bnez a7, .LBB1_3
+; NO-SINK-NEXT: bne a5, a3, .LBB1_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
; NO-SINK-NEXT: beqz a4, .LBB1_7
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -129,29 +128,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; SINK-LABEL: sink_splat_add_scalable:
; SINK: # %bb.0: # %entry
-; SINK-NEXT: csrr a5, vlenb
-; SINK-NEXT: srli a2, a5, 1
+; SINK-NEXT: csrr a2, vlenb
+; SINK-NEXT: srli a2, a2, 1
; SINK-NEXT: li a3, 1024
; SINK-NEXT: bgeu a3, a2, .LBB1_2
; SINK-NEXT: # %bb.1:
; SINK-NEXT: li a3, 0
; SINK-NEXT: j .LBB1_5
; SINK-NEXT: .LBB1_2: # %vector.ph
+; SINK-NEXT: li a5, 0
; SINK-NEXT: addi a3, a2, -1
; SINK-NEXT: andi a4, a3, 1024
; SINK-NEXT: xori a3, a4, 1024
-; SINK-NEXT: slli a5, a5, 1
-; SINK-NEXT: mv a6, a0
-; SINK-NEXT: mv a7, a3
-; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; SINK-NEXT: .LBB1_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
+; SINK-NEXT: slli a6, a5, 2
+; SINK-NEXT: add a6, a0, a6
; SINK-NEXT: vl2re32.v v8, (a6)
; SINK-NEXT: vadd.vx v8, v8, a1
+; SINK-NEXT: add a5, a5, a2
; SINK-NEXT: vs2r.v v8, (a6)
-; SINK-NEXT: sub a7, a7, a2
-; SINK-NEXT: add a6, a6, a5
-; SINK-NEXT: bnez a7, .LBB1_3
+; SINK-NEXT: bne a5, a3, .LBB1_3
; SINK-NEXT: # %bb.4: # %middle.block
; SINK-NEXT: beqz a4, .LBB1_7
; SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -171,29 +169,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; DEFAULT-LABEL: sink_splat_add_scalable:
; DEFAULT: # %bb.0: # %entry
-; DEFAULT-NEXT: csrr a5, vlenb
-; DEFAULT-NEXT: srli a2, a5, 1
+; DEFAULT-NEXT: csrr a2, vlenb
+; DEFAULT-NEXT: srli a2, a2, 1
; DEFAULT-NEXT: li a3, 1024
; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
; DEFAULT-NEXT: # %bb.1:
; DEFAULT-NEXT: li a3, 0
; DEFAULT-NEXT: j .LBB1_5
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
+; DEFAULT-NEXT: li a5, 0
; DEFAULT-NEXT: addi a3, a2, -1
; DEFAULT-NEXT: andi a4, a3, 1024
; DEFAULT-NEXT: xori a3, a4, 1024
-; DEFAULT-NEXT: slli a5, a5, 1
-; DEFAULT-NEXT: mv a6, a0
-; DEFAULT-NEXT: mv a7, a3
-; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; DEFAULT-NEXT: .LBB1_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT: slli a6, a5, 2
+; DEFAULT-NEXT: add a6, a0, a6
; DEFAULT-NEXT: vl2re32.v v8, (a6)
; DEFAULT-NEXT: vadd.vx v8, v8, a1
+; DEFAULT-NEXT: add a5, a5, a2
; DEFAULT-NEXT: vs2r.v v8, (a6)
-; DEFAULT-NEXT: sub a7, a7, a2
-; DEFAULT-NEXT: add a6, a6, a5
-; DEFAULT-NEXT: bnez a7, .LBB1_3
+; DEFAULT-NEXT: bne a5, a3, .LBB1_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
; DEFAULT-NEXT: beqz a4, .LBB1_7
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
@@ -407,32 +404,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; NO-SINK-LABEL: sink_splat_fadd_scalable:
; NO-SINK: # %bb.0: # %entry
; NO-SINK-NEXT: csrr a1, vlenb
-; NO-SINK-NEXT: srli a2, a1, 2
-; NO-SINK-NEXT: li a3, 1024
-; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
+; NO-SINK-NEXT: srli a1, a1, 2
+; NO-SINK-NEXT: li a2, 1024
+; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2
; NO-SINK-NEXT: # %bb.1:
-; NO-SINK-NEXT: li a3, 0
+; NO-SINK-NEXT: li a2, 0
; NO-SINK-NEXT: j .LBB4_5
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
-; NO-SINK-NEXT: addi a3, a2, -1
-; NO-SINK-NEXT: andi a4, a3, 1024
-; NO-SINK-NEXT: xori a3, a4, 1024
+; NO-SINK-NEXT: li a4, 0
+; NO-SINK-NEXT: addi a2, a1, -1
+; NO-SINK-NEXT: andi a3, a2, 1024
+; NO-SINK-NEXT: xori a2, a3, 1024
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; NO-SINK-NEXT: vfmv.v.f v8, fa0
-; NO-SINK-NEXT: mv a5, a0
-; NO-SINK-NEXT: mv a6, a3
; NO-SINK-NEXT: .LBB4_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT: slli a5, a4, 2
+; NO-SINK-NEXT: add a5, a0, a5
; NO-SINK-NEXT: vl1re32.v v9, (a5)
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
+; NO-SINK-NEXT: add a4, a4, a1
; NO-SINK-NEXT: vs1r.v v9, (a5)
-; NO-SINK-NEXT: sub a6, a6, a2
-; NO-SINK-NEXT: add a5, a5, a1
-; NO-SINK-NEXT: bnez a6, .LBB4_3
+; NO-SINK-NEXT: bne a4, a2, .LBB4_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
-; NO-SINK-NEXT: beqz a4, .LBB4_7
+; NO-SINK-NEXT: beqz a3, .LBB4_7
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT: slli a1, a3, 2
+; NO-SINK-NEXT: slli a1, a2, 2
; NO-SINK-NEXT: add a1, a0, a1
; NO-SINK-NEXT: lui a2, 1
; NO-SINK-NEXT: add a0, a0, a2
@@ -449,31 +446,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; SINK-LABEL: sink_splat_fadd_scalable:
; SINK: # %bb.0: # %entry
; SINK-NEXT: csrr a1, vlenb
-; SINK-NEXT: srli a2, a1, 2
-; SINK-NEXT: li a3, 1024
-; SINK-NEXT: bgeu a3, a2, .LBB4_2
+; SINK-NEXT: srli a1, a1, 2
+; SINK-NEXT: li a2, 1024
+; SINK-NEXT: bgeu a2, a1, .LBB4_2
; SINK-NEXT: # %bb.1:
-; SINK-NEXT: li a3, 0
+; SINK-NEXT: li a2, 0
; SINK-NEXT: j .LBB4_5
; SINK-NEXT: .LBB4_2: # %vector.ph
-; SINK-NEXT: addi a3, a2, -1
-; SINK-NEXT: andi a4, a3, 1024
-; SINK-NEXT: xori a3, a4, 1024
-; SINK-NEXT: mv a5, a0
-; SINK-NEXT: mv a6, a3
-; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; SINK-NEXT: li a4, 0
+; SINK-NEXT: addi a2, a1, -1
+; SINK-NEXT: andi a3, a2, 1024
+; SINK-NEXT: xori a2, a3, 1024
+; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; SINK-NEXT: .LBB4_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
+; SINK-NEXT: slli a5, a4, 2
+; SINK-NEXT: add a5, a0, a5
; SINK-NEXT: vl1re32.v v8, (a5)
; SINK-NEXT: vfadd.vf v8, v8, fa0
+; SINK-NEXT: add a4, a4, a1
; SINK-NEXT: vs1r.v v8, (a5)
-; SINK-NEXT: sub a6, a6, a2
-; SINK-NEXT: add a5, a5, a1
-; SINK-NEXT: bnez a6, .LBB4_3
+; SINK-NEXT: bne a4, a2, .LBB4_3
; SINK-NEXT: # %bb.4: # %middle.block
-; SINK-NEXT: beqz a4, .LBB4_7
+; SINK-NEXT: beqz a3, .LBB4_7
; SINK-NEXT: .LBB4_5: # %for.body.preheader
-; SINK-NEXT: slli a1, a3, 2
+; SINK-NEXT: slli a1, a2, 2
; SINK-NEXT: add a1, a0, a1
; SINK-NEXT: lui a2, 1
; SINK-NEXT: add a0, a0, a2
@@ -490,31 +487,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; DEFAULT-LABEL: sink_splat_fadd_scalable:
; DEFAULT: # %bb.0: # %entry
; DEFAULT-NEXT: csrr a1, vlenb
-; DEFAULT-NEXT: srli a2, a1, 2
-; DEFAULT-NEXT: li a3, 1024
-; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
+; DEFAULT-NEXT: srli a1, a1, 2
+; DEFAULT-NEXT: li a2, 1024
+; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2
; DEFAULT-NEXT: # %bb.1:
-; DEFAULT-NEXT: li a3, 0
+; DEFAULT-NEXT: li a2, 0
; DEFAULT-NEXT: j .LBB4_5
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
-; DEFAULT-NEXT: addi a3, a2, -1
-; DEFAULT-NEXT: andi a4, a3, 1024
-; DEFAULT-NEXT: xori a3, a4, 1024
-; DEFAULT-NEXT: mv a5, a0
-; DEFAULT-NEXT: mv a6, a3
-; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; DEFAULT-NEXT: li a4, 0
+; DEFAULT-NEXT: addi a2, a1, -1
+; DEFAULT-NEXT: andi a3, a2, 1024
+; DEFAULT-NEXT: xori a2, a3, 1024
+; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; DEFAULT-NEXT: .LBB4_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT: slli a5, a4, 2
+; DEFAULT-NEXT: add a5, a0, a5
; DEFAULT-NEXT: vl1re32.v v8, (a5)
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
+; DEFAULT-NEXT: add a4, a4, a1
; DEFAULT-NEXT: vs1r.v v8, (a5)
-; DEFAULT-NEXT: sub a6, a6, a2
-; DEFAULT-NEXT: add a5, a5, a1
-; DEFAULT-NEXT: bnez a6, .LBB4_3
+; DEFAULT-NEXT: bne a4, a2, .LBB4_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
-; DEFAULT-NEXT: beqz a4, .LBB4_7
+; DEFAULT-NEXT: beqz a3, .LBB4_7
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT: slli a1, a3, 2
+; DEFAULT-NEXT: slli a1, a2, 2
; DEFAULT-NEXT: add a1, a0, a1
; DEFAULT-NEXT: lui a2, 1
; DEFAULT-NEXT: add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index d1c98f828e76d..7742cfc7da640 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -243,29 +243,28 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB7_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vmul.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB7_3
+; CHECK-NEXT: bne a5, a3, .LBB7_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB7_7
; CHECK-NEXT: .LBB7_5: # %for.body.preheader
@@ -334,29 +333,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB8_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB8_5
; CHECK-NEXT: .LBB8_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB8_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vadd.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB8_3
+; CHECK-NEXT: bne a5, a3, .LBB8_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB8_7
; CHECK-NEXT: .LBB8_5: # %for.body.preheader
@@ -425,29 +423,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB9_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB9_5
; CHECK-NEXT: .LBB9_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB9_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsub.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB9_3
+; CHECK-NEXT: bne a5, a3, .LBB9_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB9_7
; CHECK-NEXT: .LBB9_5: # %for.body.preheader
@@ -516,29 +513,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB10_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB10_5
; CHECK-NEXT: .LBB10_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB10_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB10_3
+; CHECK-NEXT: bne a5, a3, .LBB10_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB10_7
; CHECK-NEXT: .LBB10_5: # %for.body.preheader
@@ -607,29 +603,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB11_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB11_5
; CHECK-NEXT: .LBB11_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB11_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB11_3
+; CHECK-NEXT: bne a5, a3, .LBB11_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB11_7
; CHECK-NEXT: .LBB11_5: # %for.body.preheader
@@ -698,29 +693,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB12_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB12_5
; CHECK-NEXT: .LBB12_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB12_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vor.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB12_3
+; CHECK-NEXT: bne a5, a3, .LBB12_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB12_7
; CHECK-NEXT: .LBB12_5: # %for.body.preheader
@@ -789,29 +783,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB13_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB13_5
; CHECK-NEXT: .LBB13_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB13_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vxor.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB13_3
+; CHECK-NEXT: bne a5, a3, .LBB13_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %for.body.preheader
@@ -982,29 +975,28 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_shl_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB17_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB17_5
; CHECK-NEXT: .LBB17_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB17_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsll.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB17_3
+; CHECK-NEXT: bne a5, a3, .LBB17_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB17_7
; CHECK-NEXT: .LBB17_5: # %for.body.preheader
@@ -1073,29 +1065,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_lshr_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB18_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB18_5
; CHECK-NEXT: .LBB18_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB18_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsrl.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB18_3
+; CHECK-NEXT: bne a5, a3, .LBB18_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB18_7
; CHECK-NEXT: .LBB18_5: # %for.body.preheader
@@ -1164,33 +1155,32 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_ashr_scalable(ptr nocapture %a) {
; CHECK-LABEL: sink_splat_ashr_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: srli a2, a4, 1
-; CHECK-NEXT: li a1, 1024
-; CHECK-NEXT: bgeu a1, a2, .LBB19_2
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB19_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB19_5
; CHECK-NEXT: .LBB19_2: # %vector.ph
-; CHECK-NEXT: addi a1, a2, -1
-; CHECK-NEXT: andi a3, a1, 1024
-; CHECK-NEXT: xori a1, a3, 1024
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a1
-; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB19_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl2re32.v v8, (a5)
; CHECK-NEXT: vsra.vi v8, v8, 2
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs2r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a4
-; CHECK-NEXT: bnez a6, .LBB19_3
+; CHECK-NEXT: bne a4, a2, .LBB19_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a3, .LBB19_7
; CHECK-NEXT: .LBB19_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1458,31 +1448,31 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB26_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB26_5
; CHECK-NEXT: .LBB26_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB26_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB26_3
+; CHECK-NEXT: bne a4, a2, .LBB26_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB26_7
+; CHECK-NEXT: beqz a3, .LBB26_7
; CHECK-NEXT: .LBB26_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1548,31 +1538,31 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB27_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB27_5
; CHECK-NEXT: .LBB27_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB27_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfdiv.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB27_3
+; CHECK-NEXT: bne a4, a2, .LBB27_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB27_7
+; CHECK-NEXT: beqz a3, .LBB27_7
; CHECK-NEXT: .LBB27_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1638,31 +1628,31 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB28_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB28_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB28_5
; CHECK-NEXT: .LBB28_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB28_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB28_3
+; CHECK-NEXT: bne a4, a2, .LBB28_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB28_7
+; CHECK-NEXT: beqz a3, .LBB28_7
; CHECK-NEXT: .LBB28_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1728,31 +1718,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB29_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB29_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB29_5
; CHECK-NEXT: .LBB29_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB29_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB29_3
+; CHECK-NEXT: bne a4, a2, .LBB29_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB29_7
+; CHECK-NEXT: beqz a3, .LBB29_7
; CHECK-NEXT: .LBB29_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1818,31 +1808,31 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB30_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB30_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB30_5
; CHECK-NEXT: .LBB30_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB30_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB30_3
+; CHECK-NEXT: bne a4, a2, .LBB30_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB30_7
+; CHECK-NEXT: beqz a3, .LBB30_7
; CHECK-NEXT: .LBB30_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1908,31 +1898,31 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB31_2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a1, .LBB31_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB31_5
; CHECK-NEXT: .LBB31_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
-; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: andi a3, a2, 1024
+; CHECK-NEXT: xori a2, a3, 1024
+; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB31_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a5, a4, 2
+; CHECK-NEXT: add a5, a0, a5
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
-; CHECK-NEXT: add a5, a5, a1
-; CHECK-NEXT: bnez a6, .LBB31_3
+; CHECK-NEXT: bne a4, a2, .LBB31_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a4, .LBB31_7
+; CHECK-NEXT: beqz a3, .LBB31_7
; CHECK-NEXT: .LBB31_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -2074,36 +2064,35 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-LABEL: sink_splat_fma_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a3, a2, 2
-; CHECK-NEXT: li a4, 1024
-; CHECK-NEXT: bgeu a4, a3, .LBB34_2
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB34_5
; CHECK-NEXT: .LBB34_2: # %vector.ph
-; CHECK-NEXT: addi a4, a3, -1
-; CHECK-NEXT: andi a5, a4, 1024
-; CHECK-NEXT: xori a4, a5, 1024
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a1
-; CHECK-NEXT: mv t0, a4
-; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a5, 0
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB34_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl1re32.v v8, (a6)
-; CHECK-NEXT: vl1re32.v v9, (a7)
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a7, a0, a6
+; CHECK-NEXT: vl1re32.v v8, (a7)
+; CHECK-NEXT: add a6, a1, a6
+; CHECK-NEXT: vl1re32.v v9, (a6)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
-; CHECK-NEXT: vs1r.v v9, (a6)
-; CHECK-NEXT: sub t0, t0, a3
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: add a6, a6, a2
-; CHECK-NEXT: bnez t0, .LBB34_3
+; CHECK-NEXT: add a5, a5, a2
+; CHECK-NEXT: vs1r.v v9, (a7)
+; CHECK-NEXT: bne a5, a3, .LBB34_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a5, .LBB34_7
+; CHECK-NEXT: beqz a4, .LBB34_7
; CHECK-NEXT: .LBB34_5: # %for.body.preheader
-; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a2, a1, a4
-; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB34_6: # %for.body
@@ -2174,36 +2163,35 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-LABEL: sink_splat_fma_commute_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a3, a2, 2
-; CHECK-NEXT: li a4, 1024
-; CHECK-NEXT: bgeu a4, a3, .LBB35_2
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB35_5
; CHECK-NEXT: .LBB35_2: # %vector.ph
-; CHECK-NEXT: addi a4, a3, -1
-; CHECK-NEXT: andi a5, a4, 1024
-; CHECK-NEXT: xori a4, a5, 1024
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a1
-; CHECK-NEXT: mv t0, a4
-; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
+; CHECK-NEXT: li a5, 0
+; CHECK-NEXT: addi a3, a2, -1
+; CHECK-NEXT: andi a4, a3, 1024
+; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB35_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl1re32.v v8, (a6)
-; CHECK-NEXT: vl1re32.v v9, (a7)
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a7, a0, a6
+; CHECK-NEXT: vl1re32.v v8, (a7)
+; CHECK-NEXT: add a6, a1, a6
+; CHECK-NEXT: vl1re32.v v9, (a6)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
-; CHECK-NEXT: vs1r.v v9, (a6)
-; CHECK-NEXT: sub t0, t0, a3
-; CHECK-NEXT: add a7, a7, a2
-; CHECK-NEXT: add a6, a6, a2
-; CHECK-NEXT: bnez t0, .LBB35_3
+; CHECK-NEXT: add a5, a5, a2
+; CHECK-NEXT: vs1r.v v9, (a7)
+; CHECK-NEXT: bne a5, a3, .LBB35_3
; CHECK-NEXT: # %bb.4: # %middle.block
-; CHECK-NEXT: beqz a5, .LBB35_7
+; CHECK-NEXT: beqz a4, .LBB35_7
; CHECK-NEXT: .LBB35_5: # %for.body.preheader
-; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a2, a1, a4
-; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB35_6: # %for.body
@@ -2486,29 +2474,28 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_udiv_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB42_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB42_5
; CHECK-NEXT: .LBB42_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB42_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdivu.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB42_3
+; CHECK-NEXT: bne a5, a3, .LBB42_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB42_7
; CHECK-NEXT: .LBB42_5: # %for.body.preheader
@@ -2577,29 +2564,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sdiv_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB43_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB43_5
; CHECK-NEXT: .LBB43_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB43_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdiv.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB43_3
+; CHECK-NEXT: bne a5, a3, .LBB43_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB43_7
; CHECK-NEXT: .LBB43_5: # %for.body.preheader
@@ -2668,29 +2654,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_urem_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB44_5
; CHECK-NEXT: .LBB44_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB44_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vremu.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB44_3
+; CHECK-NEXT: bne a5, a3, .LBB44_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB44_7
; CHECK-NEXT: .LBB44_5: # %for.body.preheader
@@ -2759,29 +2744,28 @@ for.body: ; preds = %for.body.preheader,
define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_srem_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB45_5
; CHECK-NEXT: .LBB45_2: # %vector.ph
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
-; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB45_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: slli a6, a5, 2
+; CHECK-NEXT: add a6, a0, a6
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrem.vx v8, v8, a1
+; CHECK-NEXT: add a5, a5, a2
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
-; CHECK-NEXT: add a6, a6, a5
-; CHECK-NEXT: bnez a7, .LBB45_3
+; CHECK-NEXT: bne a5, a3, .LBB45_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB45_7
; CHECK-NEXT: .LBB45_5: # %for.body.preheader
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 7eb6cacf1ca43..710d0d3e96071 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -634,17 +634,17 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma
-; CHECK-NEXT: slli a5, a6, 3
; CHECK-NEXT: .LBB12_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: vle64.v v9, (a3)
+; CHECK-NEXT: slli a7, a4, 3
+; CHECK-NEXT: add a5, a2, a7
+; CHECK-NEXT: vle64.v v8, (a5)
+; CHECK-NEXT: add a5, a3, a7
+; CHECK-NEXT: vle64.v v9, (a5)
; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: vse64.v v8, (a1)
+; CHECK-NEXT: add a7, a7, a1
; CHECK-NEXT: add a4, a4, a6
-; CHECK-NEXT: add a1, a1, a5
-; CHECK-NEXT: add a3, a3, a5
-; CHECK-NEXT: add a2, a2, a5
+; CHECK-NEXT: vse64.v v8, (a7)
; CHECK-NEXT: blt a4, a0, .LBB12_2
; CHECK-NEXT: .LBB12_3: # %for.end
; CHECK-NEXT: ret
@@ -681,13 +681,13 @@ define void @vector_init_vlmax(i64 %N, ptr %c) {
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma
-; CHECK-NEXT: slli a4, a3, 3
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB13_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vse64.v v8, (a1)
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: add a2, a2, a3
-; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: vse64.v v8, (a4)
; CHECK-NEXT: blt a2, a0, .LBB13_2
; CHECK-NEXT: .LBB13_3: # %for.end
; CHECK-NEXT: ret
@@ -717,15 +717,15 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma
-; CHECK-NEXT: slli a4, a3, 3
-; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a4, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB14_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, ma
-; CHECK-NEXT: vse64.v v8, (a1)
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: add a2, a2, a3
-; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a4)
; CHECK-NEXT: blt a2, a0, .LBB14_2
; CHECK-NEXT: .LBB14_3: # %for.end
; CHECK-NEXT: ret
@@ -753,15 +753,15 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma
-; CHECK-NEXT: slli a4, a3, 3
-; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a4, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: .LBB15_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vse64.v v8, (a1)
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a4, a4, a1
; CHECK-NEXT: add a2, a2, a3
-; CHECK-NEXT: add a1, a1, a4
+; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; CHECK-NEXT: vse64.v v8, (a4)
; CHECK-NEXT: blt a2, a0, .LBB15_1
; CHECK-NEXT: # %bb.2: # %for.end
; CHECK-NEXT: ret
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
index a8446c5103176..c4558a55e729f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
@@ -357,11 +357,11 @@ define void @loop_invariant_definition(i64 %arg) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[T1:%.*]]
; CHECK: t1:
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[T1]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1
+; CHECK-NEXT: [[T2:%.*]] = phi i64 [ [[T3:%.*]], [[T1]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[T3]] = add nuw i64 [[T2]], 1
; CHECK-NEXT: br i1 true, label [[T4:%.*]], label [[T1]]
; CHECK: t4:
-; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i32
+; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T2]] to i32
; CHECK-NEXT: [[T6:%.*]] = add i32 [[T5]], 1
; CHECK-NEXT: [[T7:%.*]] = icmp eq i32 [[T5]], [[T6]]
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
index 9c11bd064ad47..18ab64758e49e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
@@ -38,14 +38,14 @@ exit: ; preds = %loop
define void @test2(ptr %a) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
-; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[T19]], align 4
+; CHECK-NEXT: [[T20]] = add nuw nsw i64 [[T15]], 1
+; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[T20]], 32000
+; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: ret void
@@ -107,18 +107,17 @@ exit: ; preds = %loop
define void @test4(ptr %a, ptr %b) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
-; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
+; CHECK-NEXT: [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[T16]], align 4
; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
-; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
-; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[B:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT: store float [[T18]], ptr [[T19]], align 4
+; CHECK-NEXT: [[T20]] = add nuw nsw i64 [[T15]], 1
+; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[T20]], 32000
+; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: call void @use(ptr [[B]])
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
index 8d9d43202f0d9..6b25aa5efd508 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
@@ -1,6 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -O3 -mattr=+v -debug -lsr-drop-solution 2>&1 | FileCheck --check-prefix=DEBUG %s
-; RUN: llc < %s -O3 -mattr=+v -debug 2>&1 | FileCheck --check-prefix=DEBUG2 %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-linux-gnu"
@@ -10,7 +9,6 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
;DEBUG: The chosen solution requires 3 instructions 6 regs, with addrec cost 1, plus 2 base adds, plus 5 setup cost
;DEBUG: Baseline is more profitable than chosen solution, dropping LSR solution.
-;DEBUG2: Baseline is more profitable than chosen solution, add option 'lsr-drop-solution' to drop LSR solution.
entry:
%0 = ptrtoint ptr %a0 to i64
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
More information about the llvm-commits
mailing list