[llvm] af47a4e - [RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default (#89927)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 05:23:35 PDT 2024


Author: Alex Bradbury
Date: 2024-07-10T13:23:31+01:00
New Revision: af47a4ec503fe3efc6ade8cad4882881a202ed41

URL: https://github.com/llvm/llvm-project/commit/af47a4ec503fe3efc6ade8cad4882881a202ed41
DIFF: https://github.com/llvm/llvm-project/commit/af47a4ec503fe3efc6ade8cad4882881a202ed41.diff

LOG: [RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default (#89927)

This avoids some cases where LSR produces results that lead to very poor
codegen. There's a chance we'll see minor degradations for some inputs
in the case that our metrics say the found solution is worse, but in
reality it's better than the starting point.

Per the review thread, at least one vendor has been enabling this by
defualt for some time and found overall it's an improvement. As such,
we'll enable by default and aim to fix any as-yet-unknown regressions
in-tree.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
    llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
    llvm/test/CodeGen/RISCV/rvv/pr95865.ll
    llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
    llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
    llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
    llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
    llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d0..7b239b8fc17a3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,6 +398,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
     return true;
   }
 
+  bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
+
   std::optional<unsigned> getMinPageSize() const { return 4096; }
 };
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index 2b4b8e979f3d7..92639be0017e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -86,30 +86,29 @@ declare i64 @llvm.vscale.i64()
 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ; NO-SINK-LABEL: sink_splat_add_scalable:
 ; NO-SINK:       # %bb.0: # %entry
-; NO-SINK-NEXT:    csrr a5, vlenb
-; NO-SINK-NEXT:    srli a2, a5, 1
+; NO-SINK-NEXT:    csrr a2, vlenb
+; NO-SINK-NEXT:    srli a2, a2, 1
 ; NO-SINK-NEXT:    li a3, 1024
 ; NO-SINK-NEXT:    bgeu a3, a2, .LBB1_2
 ; NO-SINK-NEXT:  # %bb.1:
 ; NO-SINK-NEXT:    li a3, 0
 ; NO-SINK-NEXT:    j .LBB1_5
 ; NO-SINK-NEXT:  .LBB1_2: # %vector.ph
+; NO-SINK-NEXT:    li a5, 0
 ; NO-SINK-NEXT:    addi a3, a2, -1
 ; NO-SINK-NEXT:    andi a4, a3, 1024
 ; NO-SINK-NEXT:    xori a3, a4, 1024
 ; NO-SINK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; NO-SINK-NEXT:    vmv.v.x v8, a1
-; NO-SINK-NEXT:    slli a5, a5, 1
-; NO-SINK-NEXT:    mv a6, a0
-; NO-SINK-NEXT:    mv a7, a3
 ; NO-SINK-NEXT:  .LBB1_3: # %vector.body
 ; NO-SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT:    slli a6, a5, 2
+; NO-SINK-NEXT:    add a6, a0, a6
 ; NO-SINK-NEXT:    vl2re32.v v10, (a6)
 ; NO-SINK-NEXT:    vadd.vv v10, v10, v8
+; NO-SINK-NEXT:    add a5, a5, a2
 ; NO-SINK-NEXT:    vs2r.v v10, (a6)
-; NO-SINK-NEXT:    sub a7, a7, a2
-; NO-SINK-NEXT:    add a6, a6, a5
-; NO-SINK-NEXT:    bnez a7, .LBB1_3
+; NO-SINK-NEXT:    bne a5, a3, .LBB1_3
 ; NO-SINK-NEXT:  # %bb.4: # %middle.block
 ; NO-SINK-NEXT:    beqz a4, .LBB1_7
 ; NO-SINK-NEXT:  .LBB1_5: # %for.body.preheader
@@ -129,29 +128,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ;
 ; SINK-LABEL: sink_splat_add_scalable:
 ; SINK:       # %bb.0: # %entry
-; SINK-NEXT:    csrr a5, vlenb
-; SINK-NEXT:    srli a2, a5, 1
+; SINK-NEXT:    csrr a2, vlenb
+; SINK-NEXT:    srli a2, a2, 1
 ; SINK-NEXT:    li a3, 1024
 ; SINK-NEXT:    bgeu a3, a2, .LBB1_2
 ; SINK-NEXT:  # %bb.1:
 ; SINK-NEXT:    li a3, 0
 ; SINK-NEXT:    j .LBB1_5
 ; SINK-NEXT:  .LBB1_2: # %vector.ph
+; SINK-NEXT:    li a5, 0
 ; SINK-NEXT:    addi a3, a2, -1
 ; SINK-NEXT:    andi a4, a3, 1024
 ; SINK-NEXT:    xori a3, a4, 1024
-; SINK-NEXT:    slli a5, a5, 1
-; SINK-NEXT:    mv a6, a0
-; SINK-NEXT:    mv a7, a3
-; SINK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; SINK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; SINK-NEXT:  .LBB1_3: # %vector.body
 ; SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; SINK-NEXT:    slli a6, a5, 2
+; SINK-NEXT:    add a6, a0, a6
 ; SINK-NEXT:    vl2re32.v v8, (a6)
 ; SINK-NEXT:    vadd.vx v8, v8, a1
+; SINK-NEXT:    add a5, a5, a2
 ; SINK-NEXT:    vs2r.v v8, (a6)
-; SINK-NEXT:    sub a7, a7, a2
-; SINK-NEXT:    add a6, a6, a5
-; SINK-NEXT:    bnez a7, .LBB1_3
+; SINK-NEXT:    bne a5, a3, .LBB1_3
 ; SINK-NEXT:  # %bb.4: # %middle.block
 ; SINK-NEXT:    beqz a4, .LBB1_7
 ; SINK-NEXT:  .LBB1_5: # %for.body.preheader
@@ -171,29 +169,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ;
 ; DEFAULT-LABEL: sink_splat_add_scalable:
 ; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    csrr a5, vlenb
-; DEFAULT-NEXT:    srli a2, a5, 1
+; DEFAULT-NEXT:    csrr a2, vlenb
+; DEFAULT-NEXT:    srli a2, a2, 1
 ; DEFAULT-NEXT:    li a3, 1024
 ; DEFAULT-NEXT:    bgeu a3, a2, .LBB1_2
 ; DEFAULT-NEXT:  # %bb.1:
 ; DEFAULT-NEXT:    li a3, 0
 ; DEFAULT-NEXT:    j .LBB1_5
 ; DEFAULT-NEXT:  .LBB1_2: # %vector.ph
+; DEFAULT-NEXT:    li a5, 0
 ; DEFAULT-NEXT:    addi a3, a2, -1
 ; DEFAULT-NEXT:    andi a4, a3, 1024
 ; DEFAULT-NEXT:    xori a3, a4, 1024
-; DEFAULT-NEXT:    slli a5, a5, 1
-; DEFAULT-NEXT:    mv a6, a0
-; DEFAULT-NEXT:    mv a7, a3
-; DEFAULT-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; DEFAULT-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; DEFAULT-NEXT:  .LBB1_3: # %vector.body
 ; DEFAULT-NEXT:    # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT:    slli a6, a5, 2
+; DEFAULT-NEXT:    add a6, a0, a6
 ; DEFAULT-NEXT:    vl2re32.v v8, (a6)
 ; DEFAULT-NEXT:    vadd.vx v8, v8, a1
+; DEFAULT-NEXT:    add a5, a5, a2
 ; DEFAULT-NEXT:    vs2r.v v8, (a6)
-; DEFAULT-NEXT:    sub a7, a7, a2
-; DEFAULT-NEXT:    add a6, a6, a5
-; DEFAULT-NEXT:    bnez a7, .LBB1_3
+; DEFAULT-NEXT:    bne a5, a3, .LBB1_3
 ; DEFAULT-NEXT:  # %bb.4: # %middle.block
 ; DEFAULT-NEXT:    beqz a4, .LBB1_7
 ; DEFAULT-NEXT:  .LBB1_5: # %for.body.preheader
@@ -407,32 +404,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; NO-SINK-LABEL: sink_splat_fadd_scalable:
 ; NO-SINK:       # %bb.0: # %entry
 ; NO-SINK-NEXT:    csrr a1, vlenb
-; NO-SINK-NEXT:    srli a2, a1, 2
-; NO-SINK-NEXT:    li a3, 1024
-; NO-SINK-NEXT:    bgeu a3, a2, .LBB4_2
+; NO-SINK-NEXT:    srli a1, a1, 2
+; NO-SINK-NEXT:    li a2, 1024
+; NO-SINK-NEXT:    bgeu a2, a1, .LBB4_2
 ; NO-SINK-NEXT:  # %bb.1:
-; NO-SINK-NEXT:    li a3, 0
+; NO-SINK-NEXT:    li a2, 0
 ; NO-SINK-NEXT:    j .LBB4_5
 ; NO-SINK-NEXT:  .LBB4_2: # %vector.ph
-; NO-SINK-NEXT:    addi a3, a2, -1
-; NO-SINK-NEXT:    andi a4, a3, 1024
-; NO-SINK-NEXT:    xori a3, a4, 1024
+; NO-SINK-NEXT:    li a4, 0
+; NO-SINK-NEXT:    addi a2, a1, -1
+; NO-SINK-NEXT:    andi a3, a2, 1024
+; NO-SINK-NEXT:    xori a2, a3, 1024
 ; NO-SINK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; NO-SINK-NEXT:    vfmv.v.f v8, fa0
-; NO-SINK-NEXT:    mv a5, a0
-; NO-SINK-NEXT:    mv a6, a3
 ; NO-SINK-NEXT:  .LBB4_3: # %vector.body
 ; NO-SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT:    slli a5, a4, 2
+; NO-SINK-NEXT:    add a5, a0, a5
 ; NO-SINK-NEXT:    vl1re32.v v9, (a5)
 ; NO-SINK-NEXT:    vfadd.vv v9, v9, v8
+; NO-SINK-NEXT:    add a4, a4, a1
 ; NO-SINK-NEXT:    vs1r.v v9, (a5)
-; NO-SINK-NEXT:    sub a6, a6, a2
-; NO-SINK-NEXT:    add a5, a5, a1
-; NO-SINK-NEXT:    bnez a6, .LBB4_3
+; NO-SINK-NEXT:    bne a4, a2, .LBB4_3
 ; NO-SINK-NEXT:  # %bb.4: # %middle.block
-; NO-SINK-NEXT:    beqz a4, .LBB4_7
+; NO-SINK-NEXT:    beqz a3, .LBB4_7
 ; NO-SINK-NEXT:  .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT:    slli a1, a3, 2
+; NO-SINK-NEXT:    slli a1, a2, 2
 ; NO-SINK-NEXT:    add a1, a0, a1
 ; NO-SINK-NEXT:    lui a2, 1
 ; NO-SINK-NEXT:    add a0, a0, a2
@@ -449,31 +446,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; SINK-LABEL: sink_splat_fadd_scalable:
 ; SINK:       # %bb.0: # %entry
 ; SINK-NEXT:    csrr a1, vlenb
-; SINK-NEXT:    srli a2, a1, 2
-; SINK-NEXT:    li a3, 1024
-; SINK-NEXT:    bgeu a3, a2, .LBB4_2
+; SINK-NEXT:    srli a1, a1, 2
+; SINK-NEXT:    li a2, 1024
+; SINK-NEXT:    bgeu a2, a1, .LBB4_2
 ; SINK-NEXT:  # %bb.1:
-; SINK-NEXT:    li a3, 0
+; SINK-NEXT:    li a2, 0
 ; SINK-NEXT:    j .LBB4_5
 ; SINK-NEXT:  .LBB4_2: # %vector.ph
-; SINK-NEXT:    addi a3, a2, -1
-; SINK-NEXT:    andi a4, a3, 1024
-; SINK-NEXT:    xori a3, a4, 1024
-; SINK-NEXT:    mv a5, a0
-; SINK-NEXT:    mv a6, a3
-; SINK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; SINK-NEXT:    li a4, 0
+; SINK-NEXT:    addi a2, a1, -1
+; SINK-NEXT:    andi a3, a2, 1024
+; SINK-NEXT:    xori a2, a3, 1024
+; SINK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; SINK-NEXT:  .LBB4_3: # %vector.body
 ; SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; SINK-NEXT:    slli a5, a4, 2
+; SINK-NEXT:    add a5, a0, a5
 ; SINK-NEXT:    vl1re32.v v8, (a5)
 ; SINK-NEXT:    vfadd.vf v8, v8, fa0
+; SINK-NEXT:    add a4, a4, a1
 ; SINK-NEXT:    vs1r.v v8, (a5)
-; SINK-NEXT:    sub a6, a6, a2
-; SINK-NEXT:    add a5, a5, a1
-; SINK-NEXT:    bnez a6, .LBB4_3
+; SINK-NEXT:    bne a4, a2, .LBB4_3
 ; SINK-NEXT:  # %bb.4: # %middle.block
-; SINK-NEXT:    beqz a4, .LBB4_7
+; SINK-NEXT:    beqz a3, .LBB4_7
 ; SINK-NEXT:  .LBB4_5: # %for.body.preheader
-; SINK-NEXT:    slli a1, a3, 2
+; SINK-NEXT:    slli a1, a2, 2
 ; SINK-NEXT:    add a1, a0, a1
 ; SINK-NEXT:    lui a2, 1
 ; SINK-NEXT:    add a0, a0, a2
@@ -490,31 +487,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; DEFAULT-LABEL: sink_splat_fadd_scalable:
 ; DEFAULT:       # %bb.0: # %entry
 ; DEFAULT-NEXT:    csrr a1, vlenb
-; DEFAULT-NEXT:    srli a2, a1, 2
-; DEFAULT-NEXT:    li a3, 1024
-; DEFAULT-NEXT:    bgeu a3, a2, .LBB4_2
+; DEFAULT-NEXT:    srli a1, a1, 2
+; DEFAULT-NEXT:    li a2, 1024
+; DEFAULT-NEXT:    bgeu a2, a1, .LBB4_2
 ; DEFAULT-NEXT:  # %bb.1:
-; DEFAULT-NEXT:    li a3, 0
+; DEFAULT-NEXT:    li a2, 0
 ; DEFAULT-NEXT:    j .LBB4_5
 ; DEFAULT-NEXT:  .LBB4_2: # %vector.ph
-; DEFAULT-NEXT:    addi a3, a2, -1
-; DEFAULT-NEXT:    andi a4, a3, 1024
-; DEFAULT-NEXT:    xori a3, a4, 1024
-; DEFAULT-NEXT:    mv a5, a0
-; DEFAULT-NEXT:    mv a6, a3
-; DEFAULT-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; DEFAULT-NEXT:    li a4, 0
+; DEFAULT-NEXT:    addi a2, a1, -1
+; DEFAULT-NEXT:    andi a3, a2, 1024
+; DEFAULT-NEXT:    xori a2, a3, 1024
+; DEFAULT-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; DEFAULT-NEXT:  .LBB4_3: # %vector.body
 ; DEFAULT-NEXT:    # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT:    slli a5, a4, 2
+; DEFAULT-NEXT:    add a5, a0, a5
 ; DEFAULT-NEXT:    vl1re32.v v8, (a5)
 ; DEFAULT-NEXT:    vfadd.vf v8, v8, fa0
+; DEFAULT-NEXT:    add a4, a4, a1
 ; DEFAULT-NEXT:    vs1r.v v8, (a5)
-; DEFAULT-NEXT:    sub a6, a6, a2
-; DEFAULT-NEXT:    add a5, a5, a1
-; DEFAULT-NEXT:    bnez a6, .LBB4_3
+; DEFAULT-NEXT:    bne a4, a2, .LBB4_3
 ; DEFAULT-NEXT:  # %bb.4: # %middle.block
-; DEFAULT-NEXT:    beqz a4, .LBB4_7
+; DEFAULT-NEXT:    beqz a3, .LBB4_7
 ; DEFAULT-NEXT:  .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT:    slli a1, a3, 2
+; DEFAULT-NEXT:    slli a1, a2, 2
 ; DEFAULT-NEXT:    add a1, a0, a1
 ; DEFAULT-NEXT:    lui a2, 1
 ; DEFAULT-NEXT:    add a0, a0, a2

diff  --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
index 3cb3c94d4e1f2..c95301809375c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
@@ -7,143 +7,106 @@
 define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -112
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -48
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    .cfi_offset s5, -56
-; CHECK-NEXT:    .cfi_offset s6, -64
-; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
-; CHECK-NEXT:    .cfi_offset s10, -96
-; CHECK-NEXT:    .cfi_offset s11, -104
-; CHECK-NEXT:    li s2, 0
-; CHECK-NEXT:    li a7, 8
-; CHECK-NEXT:    li t0, 12
-; CHECK-NEXT:    li s0, 4
-; CHECK-NEXT:    li t1, 20
-; CHECK-NEXT:    ld a1, 112(sp)
-; CHECK-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    li a7, 0
+; CHECK-NEXT:    ld s2, 48(sp)
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    andi t3, a4, 1
-; CHECK-NEXT:    li t2, 4
+; CHECK-NEXT:    andi s3, a0, 1
+; CHECK-NEXT:    andi t1, a2, 1
+; CHECK-NEXT:    andi a6, a4, 1
 ; CHECK-NEXT:  .LBB0_1: # %for.cond1.preheader.i
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-NEXT:    # Child Loop BB0_3 Depth 3
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
-; CHECK-NEXT:    mv t4, t1
-; CHECK-NEXT:    mv t5, t2
-; CHECK-NEXT:    mv t6, t0
-; CHECK-NEXT:    mv s3, a7
-; CHECK-NEXT:    mv a6, s2
+; CHECK-NEXT:    li t0, 0
 ; CHECK-NEXT:  .LBB0_2: # %for.cond5.preheader.i
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # => This Loop Header: Depth=2
 ; CHECK-NEXT:    # Child Loop BB0_3 Depth 3
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
-; CHECK-NEXT:    mv s5, t4
-; CHECK-NEXT:    mv s6, t5
-; CHECK-NEXT:    mv s7, t6
-; CHECK-NEXT:    mv s8, s3
-; CHECK-NEXT:    mv s4, a6
+; CHECK-NEXT:    li t3, 0
+; CHECK-NEXT:    add t2, t0, a7
 ; CHECK-NEXT:  .LBB0_3: # %for.cond9.preheader.i
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
 ; CHECK-NEXT:    # => This Loop Header: Depth=3
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
-; CHECK-NEXT:    mv s11, s5
-; CHECK-NEXT:    mv a3, s6
-; CHECK-NEXT:    mv ra, s7
-; CHECK-NEXT:    mv a4, s8
-; CHECK-NEXT:    mv s9, s4
+; CHECK-NEXT:    li t5, 0
+; CHECK-NEXT:    add t4, t2, t3
 ; CHECK-NEXT:  .LBB0_4: # %vector.ph.i
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=3
 ; CHECK-NEXT:    # => This Loop Header: Depth=4
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
-; CHECK-NEXT:    li a5, 0
+; CHECK-NEXT:    li s1, 0
+; CHECK-NEXT:    add a2, t4, t5
+; CHECK-NEXT:    addi a0, a2, 2
+; CHECK-NEXT:    addi a3, a2, 3
+; CHECK-NEXT:    addi a1, a2, 1
+; CHECK-NEXT:    addi a4, a2, 5
+; CHECK-NEXT:    li a5, 1
 ; CHECK-NEXT:  .LBB0_5: # %vector.body.i
 ; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=3
 ; CHECK-NEXT:    # Parent Loop BB0_4 Depth=4
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=5
-; CHECK-NEXT:    addi s1, a5, 4
-; CHECK-NEXT:    add a1, a4, a5
-; CHECK-NEXT:    vse32.v v8, (a1), v0.t
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    vse32.v v8, (a5), v0.t
-; CHECK-NEXT:    mv a5, s1
-; CHECK-NEXT:    bne s1, s0, .LBB0_5
+; CHECK-NEXT:    mv t6, s1
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a5, a5, 1
+; CHECK-NEXT:    slli s1, a0, 2
+; CHECK-NEXT:    addi a0, a0, 1
+; CHECK-NEXT:    addi a3, a3, 1
+; CHECK-NEXT:    slli s0, a1, 2
+; CHECK-NEXT:    addi a1, a1, 1
+; CHECK-NEXT:    addi a4, a4, 1
+; CHECK-NEXT:    vse32.v v8, (s1), v0.t
+; CHECK-NEXT:    vse32.v v8, (s0), v0.t
+; CHECK-NEXT:    addi s1, t6, 1
+; CHECK-NEXT:    bnez t6, .LBB0_5
 ; CHECK-NEXT:  # %bb.6: # %for.cond.cleanup15.i
 ; CHECK-NEXT:    # in Loop: Header=BB0_4 Depth=4
-; CHECK-NEXT:    addi s9, s9, 4
-; CHECK-NEXT:    addi a4, a4, 4
-; CHECK-NEXT:    addi ra, ra, 4
-; CHECK-NEXT:    addi a3, a3, 4
-; CHECK-NEXT:    andi s10, a0, 1
-; CHECK-NEXT:    addi s11, s11, 4
-; CHECK-NEXT:    beqz s10, .LBB0_4
+; CHECK-NEXT:    addi t5, t5, 1
+; CHECK-NEXT:    beqz s3, .LBB0_4
 ; CHECK-NEXT:  # %bb.7: # %for.cond.cleanup11.i
 ; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=3
-; CHECK-NEXT:    addi s4, s4, 4
-; CHECK-NEXT:    addi s8, s8, 4
-; CHECK-NEXT:    addi s7, s7, 4
-; CHECK-NEXT:    addi s6, s6, 4
-; CHECK-NEXT:    andi a1, a2, 1
-; CHECK-NEXT:    addi s5, s5, 4
-; CHECK-NEXT:    beqz a1, .LBB0_3
+; CHECK-NEXT:    addi t3, t3, 1
+; CHECK-NEXT:    beqz t1, .LBB0_3
 ; CHECK-NEXT:  # %bb.8: # %for.cond.cleanup7.i
 ; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=2
-; CHECK-NEXT:    addi a6, a6, 4
-; CHECK-NEXT:    addi s3, s3, 4
-; CHECK-NEXT:    addi t6, t6, 4
-; CHECK-NEXT:    addi t5, t5, 4
-; CHECK-NEXT:    addi t4, t4, 4
-; CHECK-NEXT:    beqz t3, .LBB0_2
+; CHECK-NEXT:    addi t0, t0, 1
+; CHECK-NEXT:    beqz a6, .LBB0_2
 ; CHECK-NEXT:  # %bb.9: # %for.cond.cleanup3.i
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    addi s2, s2, 4
-; CHECK-NEXT:    addi a7, a7, 4
-; CHECK-NEXT:    addi t0, t0, 4
-; CHECK-NEXT:    addi t2, t2, 4
-; CHECK-NEXT:    addi t1, t1, 4
-; CHECK-NEXT:    beqz a1, .LBB0_1
+; CHECK-NEXT:    addi a7, a7, 1
+; CHECK-NEXT:    beqz t1, .LBB0_1
 ; CHECK-NEXT:  # %bb.10: # %l.exit
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    jalr a0
-; CHECK-NEXT:    beqz s10, .LBB0_12
+; CHECK-NEXT:    beqz s3, .LBB0_12
 ; CHECK-NEXT:  .LBB0_11: # %for.body7.us.14
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    j .LBB0_11
 ; CHECK-NEXT:  .LBB0_12: # %for.body7.us.19
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v8, s2
 ; CHECK-NEXT:    vmv.v.i v16, 0
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; CHECK-NEXT:    vslideup.vi v16, v8, 1
@@ -153,20 +116,12 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
 ; CHECK-NEXT:    snez a0, a0
 ; CHECK-NEXT:    sb a0, 0(zero)
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 112
+; CHECK-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 48
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()

diff  --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index d1c98f828e76d..7742cfc7da640 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -243,29 +243,28 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_mul_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB7_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB7_5
 ; CHECK-NEXT:  .LBB7_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB7_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB7_3
+; CHECK-NEXT:    bne a5, a3, .LBB7_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB7_7
 ; CHECK-NEXT:  .LBB7_5: # %for.body.preheader
@@ -334,29 +333,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_add_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB8_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB8_5
 ; CHECK-NEXT:  .LBB8_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB8_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vadd.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB8_3
+; CHECK-NEXT:    bne a5, a3, .LBB8_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB8_7
 ; CHECK-NEXT:  .LBB8_5: # %for.body.preheader
@@ -425,29 +423,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_sub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB9_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB9_5
 ; CHECK-NEXT:  .LBB9_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB9_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vsub.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB9_3
+; CHECK-NEXT:    bne a5, a3, .LBB9_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB9_7
 ; CHECK-NEXT:  .LBB9_5: # %for.body.preheader
@@ -516,29 +513,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_rsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB10_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB10_5
 ; CHECK-NEXT:  .LBB10_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB10_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vrsub.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB10_3
+; CHECK-NEXT:    bne a5, a3, .LBB10_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB10_7
 ; CHECK-NEXT:  .LBB10_5: # %for.body.preheader
@@ -607,29 +603,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_and_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB11_5
 ; CHECK-NEXT:  .LBB11_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB11_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB11_3
+; CHECK-NEXT:    bne a5, a3, .LBB11_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB11_7
 ; CHECK-NEXT:  .LBB11_5: # %for.body.preheader
@@ -698,29 +693,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_or_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB12_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB12_5
 ; CHECK-NEXT:  .LBB12_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB12_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vor.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB12_3
+; CHECK-NEXT:    bne a5, a3, .LBB12_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB12_7
 ; CHECK-NEXT:  .LBB12_5: # %for.body.preheader
@@ -789,29 +783,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_xor_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB13_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB13_5
 ; CHECK-NEXT:  .LBB13_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB13_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vxor.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB13_3
+; CHECK-NEXT:    bne a5, a3, .LBB13_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB13_7
 ; CHECK-NEXT:  .LBB13_5: # %for.body.preheader
@@ -982,29 +975,28 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_shl_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB17_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB17_5
 ; CHECK-NEXT:  .LBB17_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB17_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vsll.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB17_3
+; CHECK-NEXT:    bne a5, a3, .LBB17_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB17_7
 ; CHECK-NEXT:  .LBB17_5: # %for.body.preheader
@@ -1073,29 +1065,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_lshr_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB18_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB18_5
 ; CHECK-NEXT:  .LBB18_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB18_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vsrl.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB18_3
+; CHECK-NEXT:    bne a5, a3, .LBB18_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB18_7
 ; CHECK-NEXT:  .LBB18_5: # %for.body.preheader
@@ -1164,33 +1155,32 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_ashr_scalable(ptr nocapture %a) {
 ; CHECK-LABEL: sink_splat_ashr_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a4, vlenb
-; CHECK-NEXT:    srli a2, a4, 1
-; CHECK-NEXT:    li a1, 1024
-; CHECK-NEXT:    bgeu a1, a2, .LBB19_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a1, a1, 1
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB19_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB19_5
 ; CHECK-NEXT:  .LBB19_2: # %vector.ph
-; CHECK-NEXT:    addi a1, a2, -1
-; CHECK-NEXT:    andi a3, a1, 1024
-; CHECK-NEXT:    xori a1, a3, 1024
-; CHECK-NEXT:    slli a4, a4, 1
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a1
-; CHECK-NEXT:    vsetvli a7, zero, e32, m2, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB19_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl2re32.v v8, (a5)
 ; CHECK-NEXT:    vsra.vi v8, v8, 2
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs2r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a4
-; CHECK-NEXT:    bnez a6, .LBB19_3
+; CHECK-NEXT:    bne a4, a2, .LBB19_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a3, .LBB19_7
 ; CHECK-NEXT:  .LBB19_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1458,31 +1448,31 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fmul_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB26_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB26_5
 ; CHECK-NEXT:  .LBB26_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB26_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfmul.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB26_3
+; CHECK-NEXT:    bne a4, a2, .LBB26_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB26_7
+; CHECK-NEXT:    beqz a3, .LBB26_7
 ; CHECK-NEXT:  .LBB26_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1548,31 +1538,31 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB27_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB27_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB27_5
 ; CHECK-NEXT:  .LBB27_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB27_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB27_3
+; CHECK-NEXT:    bne a4, a2, .LBB27_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB27_7
+; CHECK-NEXT:    beqz a3, .LBB27_7
 ; CHECK-NEXT:  .LBB27_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1638,31 +1628,31 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB28_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB28_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB28_5
 ; CHECK-NEXT:  .LBB28_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB28_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB28_3
+; CHECK-NEXT:    bne a4, a2, .LBB28_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB28_7
+; CHECK-NEXT:    beqz a3, .LBB28_7
 ; CHECK-NEXT:  .LBB28_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1728,31 +1718,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fadd_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB29_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB29_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB29_5
 ; CHECK-NEXT:  .LBB29_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB29_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfadd.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB29_3
+; CHECK-NEXT:    bne a4, a2, .LBB29_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB29_7
+; CHECK-NEXT:    beqz a3, .LBB29_7
 ; CHECK-NEXT:  .LBB29_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1818,31 +1808,31 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fsub_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB30_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB30_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB30_5
 ; CHECK-NEXT:  .LBB30_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB30_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfsub.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB30_3
+; CHECK-NEXT:    bne a4, a2, .LBB30_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB30_7
+; CHECK-NEXT:    beqz a3, .LBB30_7
 ; CHECK-NEXT:  .LBB30_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -1908,31 +1898,31 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frsub_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    srli a2, a1, 2
-; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a2, .LBB31_2
+; CHECK-NEXT:    srli a1, a1, 2
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    bgeu a2, a1, .LBB31_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a3, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB31_5
 ; CHECK-NEXT:  .LBB31_2: # %vector.ph
-; CHECK-NEXT:    addi a3, a2, -1
-; CHECK-NEXT:    andi a4, a3, 1024
-; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a3
-; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    addi a2, a1, -1
+; CHECK-NEXT:    andi a3, a2, 1024
+; CHECK-NEXT:    xori a2, a3, 1024
+; CHECK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB31_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a5, a4, 2
+; CHECK-NEXT:    add a5, a0, a5
 ; CHECK-NEXT:    vl1re32.v v8, (a5)
 ; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    vs1r.v v8, (a5)
-; CHECK-NEXT:    sub a6, a6, a2
-; CHECK-NEXT:    add a5, a5, a1
-; CHECK-NEXT:    bnez a6, .LBB31_3
+; CHECK-NEXT:    bne a4, a2, .LBB31_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a4, .LBB31_7
+; CHECK-NEXT:    beqz a3, .LBB31_7
 ; CHECK-NEXT:  .LBB31_5: # %for.body.preheader
-; CHECK-NEXT:    slli a1, a3, 2
+; CHECK-NEXT:    slli a1, a2, 2
 ; CHECK-NEXT:    add a1, a0, a1
 ; CHECK-NEXT:    lui a2, 1
 ; CHECK-NEXT:    add a0, a0, a2
@@ -2074,36 +2064,35 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
 ; CHECK-LABEL: sink_splat_fma_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a4, 1024
-; CHECK-NEXT:    bgeu a4, a3, .LBB34_2
+; CHECK-NEXT:    srli a2, a2, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB34_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB34_5
 ; CHECK-NEXT:  .LBB34_2: # %vector.ph
-; CHECK-NEXT:    addi a4, a3, -1
-; CHECK-NEXT:    andi a5, a4, 1024
-; CHECK-NEXT:    xori a4, a5, 1024
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a1
-; CHECK-NEXT:    mv t0, a4
-; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a5, 0
+; CHECK-NEXT:    addi a3, a2, -1
+; CHECK-NEXT:    andi a4, a3, 1024
+; CHECK-NEXT:    xori a3, a4, 1024
+; CHECK-NEXT:    vsetvli a6, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB34_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a6)
-; CHECK-NEXT:    vl1re32.v v9, (a7)
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a7, a0, a6
+; CHECK-NEXT:    vl1re32.v v8, (a7)
+; CHECK-NEXT:    add a6, a1, a6
+; CHECK-NEXT:    vl1re32.v v9, (a6)
 ; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vs1r.v v9, (a6)
-; CHECK-NEXT:    sub t0, t0, a3
-; CHECK-NEXT:    add a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bnez t0, .LBB34_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    vs1r.v v9, (a7)
+; CHECK-NEXT:    bne a5, a3, .LBB34_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a5, .LBB34_7
+; CHECK-NEXT:    beqz a4, .LBB34_7
 ; CHECK-NEXT:  .LBB34_5: # %for.body.preheader
-; CHECK-NEXT:    slli a4, a4, 2
-; CHECK-NEXT:    add a2, a1, a4
-; CHECK-NEXT:    add a0, a0, a4
+; CHECK-NEXT:    slli a3, a3, 2
+; CHECK-NEXT:    add a2, a1, a3
+; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    lui a3, 1
 ; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:  .LBB34_6: # %for.body
@@ -2174,36 +2163,35 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
 ; CHECK-LABEL: sink_splat_fma_commute_scalable:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a4, 1024
-; CHECK-NEXT:    bgeu a4, a3, .LBB35_2
+; CHECK-NEXT:    srli a2, a2, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB35_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB35_5
 ; CHECK-NEXT:  .LBB35_2: # %vector.ph
-; CHECK-NEXT:    addi a4, a3, -1
-; CHECK-NEXT:    andi a5, a4, 1024
-; CHECK-NEXT:    xori a4, a5, 1024
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a1
-; CHECK-NEXT:    mv t0, a4
-; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    li a5, 0
+; CHECK-NEXT:    addi a3, a2, -1
+; CHECK-NEXT:    andi a4, a3, 1024
+; CHECK-NEXT:    xori a3, a4, 1024
+; CHECK-NEXT:    vsetvli a6, zero, e32, m1, ta, ma
 ; CHECK-NEXT:  .LBB35_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a6)
-; CHECK-NEXT:    vl1re32.v v9, (a7)
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a7, a0, a6
+; CHECK-NEXT:    vl1re32.v v8, (a7)
+; CHECK-NEXT:    add a6, a1, a6
+; CHECK-NEXT:    vl1re32.v v9, (a6)
 ; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vs1r.v v9, (a6)
-; CHECK-NEXT:    sub t0, t0, a3
-; CHECK-NEXT:    add a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bnez t0, .LBB35_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    vs1r.v v9, (a7)
+; CHECK-NEXT:    bne a5, a3, .LBB35_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a5, .LBB35_7
+; CHECK-NEXT:    beqz a4, .LBB35_7
 ; CHECK-NEXT:  .LBB35_5: # %for.body.preheader
-; CHECK-NEXT:    slli a4, a4, 2
-; CHECK-NEXT:    add a2, a1, a4
-; CHECK-NEXT:    add a0, a0, a4
+; CHECK-NEXT:    slli a3, a3, 2
+; CHECK-NEXT:    add a2, a1, a3
+; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    lui a3, 1
 ; CHECK-NEXT:    add a1, a1, a3
 ; CHECK-NEXT:  .LBB35_6: # %for.body
@@ -2486,29 +2474,28 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_udiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB42_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB42_5
 ; CHECK-NEXT:  .LBB42_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB42_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vdivu.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB42_3
+; CHECK-NEXT:    bne a5, a3, .LBB42_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB42_7
 ; CHECK-NEXT:  .LBB42_5: # %for.body.preheader
@@ -2577,29 +2564,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_sdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB43_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB43_5
 ; CHECK-NEXT:  .LBB43_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB43_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vdiv.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB43_3
+; CHECK-NEXT:    bne a5, a3, .LBB43_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB43_7
 ; CHECK-NEXT:  .LBB43_5: # %for.body.preheader
@@ -2668,29 +2654,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_urem_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB44_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB44_5
 ; CHECK-NEXT:  .LBB44_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB44_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vremu.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB44_3
+; CHECK-NEXT:    bne a5, a3, .LBB44_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB44_7
 ; CHECK-NEXT:  .LBB44_5: # %for.body.preheader
@@ -2759,29 +2744,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_srem_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB45_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB45_5
 ; CHECK-NEXT:  .LBB45_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
-; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; CHECK-NEXT:  .LBB45_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vrem.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB45_3
+; CHECK-NEXT:    bne a5, a3, .LBB45_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB45_7
 ; CHECK-NEXT:  .LBB45_5: # %for.body.preheader

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index f93022c9d132d..15bbc1f9f6e35 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -635,17 +635,17 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    vsetvli a6, zero, e64, m1, ta, ma
-; CHECK-NEXT:    slli a5, a6, 3
 ; CHECK-NEXT:  .LBB12_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v9, (a3)
+; CHECK-NEXT:    slli a7, a4, 3
+; CHECK-NEXT:    add a5, a2, a7
+; CHECK-NEXT:    vle64.v v8, (a5)
+; CHECK-NEXT:    add a5, a3, a7
+; CHECK-NEXT:    vle64.v v9, (a5)
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    add a7, a7, a1
 ; CHECK-NEXT:    add a4, a4, a6
-; CHECK-NEXT:    add a1, a1, a5
-; CHECK-NEXT:    add a3, a3, a5
-; CHECK-NEXT:    add a2, a2, a5
+; CHECK-NEXT:    vse64.v v8, (a7)
 ; CHECK-NEXT:    blt a4, a0, .LBB12_2
 ; CHECK-NEXT:  .LBB12_3: # %for.end
 ; CHECK-NEXT:    ret
@@ -682,13 +682,13 @@ define void @vector_init_vlmax(i64 %N, ptr %c) {
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
-; CHECK-NEXT:    slli a4, a3, 3
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:  .LBB13_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    slli a4, a2, 3
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    vse64.v v8, (a4)
 ; CHECK-NEXT:    blt a2, a0, .LBB13_2
 ; CHECK-NEXT:  .LBB13_3: # %for.end
 ; CHECK-NEXT:    ret
@@ -718,15 +718,15 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    vsetvli a3, a0, e64, m1, ta, ma
-; CHECK-NEXT:    slli a4, a3, 3
-; CHECK-NEXT:    vsetvli a5, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsetvli a4, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:  .LBB14_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    slli a4, a2, 3
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a4)
 ; CHECK-NEXT:    blt a2, a0, .LBB14_2
 ; CHECK-NEXT:  .LBB14_3: # %for.end
 ; CHECK-NEXT:    ret
@@ -754,15 +754,15 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    vsetivli a3, 4, e64, m1, ta, ma
-; CHECK-NEXT:    slli a4, a3, 3
-; CHECK-NEXT:    vsetvli a5, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsetvli a4, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:  .LBB15_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    slli a4, a2, 3
+; CHECK-NEXT:    add a4, a4, a1
 ; CHECK-NEXT:    add a2, a2, a3
-; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a4)
 ; CHECK-NEXT:    blt a2, a0, .LBB15_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
index a8446c5103176..c4558a55e729f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
@@ -357,11 +357,11 @@ define void @loop_invariant_definition(i64 %arg) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[T1:%.*]]
 ; CHECK:       t1:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[T1]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1
+; CHECK-NEXT:    [[T2:%.*]] = phi i64 [ [[T3:%.*]], [[T1]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[T3]] = add nuw i64 [[T2]], 1
 ; CHECK-NEXT:    br i1 true, label [[T4:%.*]], label [[T1]]
 ; CHECK:       t4:
-; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i32
+; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T2]] to i32
 ; CHECK-NEXT:    [[T6:%.*]] = add i32 [[T5]], 1
 ; CHECK-NEXT:    [[T7:%.*]] = icmp eq i32 [[T5]], [[T6]]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
index 9c11bd064ad47..18ab64758e49e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
@@ -38,14 +38,14 @@ exit:                                             ; preds = %loop
 define void @test2(ptr %a) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    store float 1.000000e+00, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
-; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT:    [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT:    store float 1.000000e+00, ptr [[T19]], align 4
+; CHECK-NEXT:    [[T20]] = add nuw nsw i64 [[T15]], 1
+; CHECK-NEXT:    [[T21:%.*]] = icmp eq i64 [[T20]], 32000
+; CHECK-NEXT:    br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    call void @use(ptr [[A]])
 ; CHECK-NEXT:    ret void
@@ -107,18 +107,17 @@ exit:                                             ; preds = %loop
 define void @test4(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
-; CHECK-NEXT:    [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
+; CHECK-NEXT:    [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[T16:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT:    [[T17:%.*]] = load float, ptr [[T16]], align 4
 ; CHECK-NEXT:    [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
-; CHECK-NEXT:    store float [[T18]], ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-NEXT:    [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
-; CHECK-NEXT:    br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT:    [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[B:%.*]], i64 0, i64 [[T15]]
+; CHECK-NEXT:    store float [[T18]], ptr [[T19]], align 4
+; CHECK-NEXT:    [[T20]] = add nuw nsw i64 [[T15]], 1
+; CHECK-NEXT:    [[T21:%.*]] = icmp eq i64 [[T20]], 32000
+; CHECK-NEXT:    br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    call void @use(ptr [[A]])
 ; CHECK-NEXT:    call void @use(ptr [[B]])

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
index 8d9d43202f0d9..6b25aa5efd508 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll
@@ -1,6 +1,5 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -O3 -mattr=+v -debug -lsr-drop-solution 2>&1 | FileCheck --check-prefix=DEBUG %s
-; RUN: llc < %s -O3 -mattr=+v -debug 2>&1 | FileCheck --check-prefix=DEBUG2 %s
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
 target triple = "riscv64-unknown-linux-gnu"
@@ -10,7 +9,6 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
 ;DEBUG: The chosen solution requires 3 instructions 6 regs, with addrec cost 1, plus 2 base adds, plus 5 setup cost
 ;DEBUG: Baseline is more profitable than chosen solution, dropping LSR solution.
 
-;DEBUG2: Baseline is more profitable than chosen solution, add option 'lsr-drop-solution' to drop LSR solution.
 entry:
   %0 = ptrtoint ptr %a0 to i64
   %1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll
index 4914bb72d8945..aa7781e4374bf 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll
@@ -20,53 +20,33 @@ define i32 @main() {
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[CALL]], align 4
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       [[BB2]]:
-; CHECK-NEXT:    [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[BB:%.*]] ]
-; CHECK-NEXT:    [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 40, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 48, [[BB]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 72, [[BB]] ]
-; CHECK-NEXT:    [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV30]]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[ADD:%.*]], %[[BB2]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr [[STRUCT:%.*]], ptr [[CALL]], i64 [[PHI]]
+; CHECK-NEXT:    [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 8
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP32]], align 8
-; CHECK-NEXT:    [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV27]]
+; CHECK-NEXT:    [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 12
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP29]], align 4
-; CHECK-NEXT:    [[SCEVGEP26:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV24]]
+; CHECK-NEXT:    [[SCEVGEP26:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 16
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP26]], align 8
-; CHECK-NEXT:    [[SCEVGEP23:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV21]]
+; CHECK-NEXT:    [[SCEVGEP23:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 20
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP23]], align 4
-; CHECK-NEXT:    [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV18]]
+; CHECK-NEXT:    [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 24
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP20]], align 8
-; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV15]]
+; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 28
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP17]], align 4
-; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV12]]
+; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 32
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP14]], align 8
-; CHECK-NEXT:    [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV9]]
+; CHECK-NEXT:    [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 36
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP11]], align 4
-; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]]
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 40
 ; CHECK-NEXT:    store i64 0, ptr [[SCEVGEP6]], align 8
-; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]]
+; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 48
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP3]], align 8
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 72
 ; CHECK-NEXT:    store i32 0, ptr [[SCEVGEP]], align 8
-; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]]
-; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 40
+; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 80
 ; CHECK-NEXT:    store i64 0, ptr [[SCEVGEP8]], align 8
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT5]] = add i64 [[LSR_IV4]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT10]] = add i64 [[LSR_IV9]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT13]] = add i64 [[LSR_IV12]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT16]] = add i64 [[LSR_IV15]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT19]] = add i64 [[LSR_IV18]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT22]] = add i64 [[LSR_IV21]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT25]] = add i64 [[LSR_IV24]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT28]] = add i64 [[LSR_IV27]], 88
-; CHECK-NEXT:    [[LSR_IV_NEXT31]] = add i64 [[LSR_IV30]], 88
+; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
 ; CHECK-NEXT:    br label %[[BB2]]
 ;
 0:


        


More information about the llvm-commits mailing list