[llvm] [RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default (PR #89927)

Wed Apr 24 06:35:50 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-transforms

Author: Alex Bradbury (asb)

<details>
<summary>Changes</summary>

This avoids some cases where LSR produces results that lead to very poor codegen. There's a chance we'll see minor degradations for some inputs in the case that our metrics say the found solution is worse, but in reality it's better than the starting point.

---

Stacks on https://github.com/llvm/llvm-project/pull/89924

Not tested enough yet to realistically merge at this point, but posting early in case others have looked at this already. Here is an example input that has terrible codegen without this:

```llvm
%struct.ham = type { i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i64, i64, i32, i64 }

define i32 @main() {
bb:
  %call = tail call ptr null(i64 0)
  br label %bb2

bb1:                                              ; No predecessors!
  %load = load i32, ptr %call, align 4
  ret i32 0

bb2:                                              ; preds = %bb2, %bb
  %phi = phi i64 [ 0, %bb ], [ %add, %bb2 ]
  %getelementptr = getelementptr %struct.ham, ptr %call, i64 %phi
  %getelementptr3 = getelementptr i8, ptr %getelementptr, i64 8
  store i32 0, ptr %getelementptr3, align 8
  %getelementptr4 = getelementptr i8, ptr %getelementptr, i64 12
  store i32 0, ptr %getelementptr4, align 4
  %getelementptr5 = getelementptr i8, ptr %getelementptr, i64 16
  store i32 0, ptr %getelementptr5, align 8
  %getelementptr6 = getelementptr i8, ptr %getelementptr, i64 20
  store i32 0, ptr %getelementptr6, align 4
  %getelementptr7 = getelementptr i8, ptr %getelementptr, i64 24
  store i32 0, ptr %getelementptr7, align 8
  %getelementptr8 = getelementptr i8, ptr %getelementptr, i64 28
  store i32 0, ptr %getelementptr8, align 4
  %getelementptr9 = getelementptr i8, ptr %getelementptr, i64 32
  store i32 0, ptr %getelementptr9, align 8
  %getelementptr10 = getelementptr i8, ptr %getelementptr, i64 36
  store i32 0, ptr %getelementptr10, align 4
  %getelementptr11 = getelementptr i8, ptr %getelementptr, i64 40
  store i64 0, ptr %getelementptr11, align 8
  %getelementptr12 = getelementptr i8, ptr %getelementptr, i64 48
  store i32 0, ptr %getelementptr12, align 8
  %getelementptr13 = getelementptr i8, ptr %getelementptr, i64 72
  store i32 0, ptr %getelementptr13, align 8
  %getelementptr14 = getelementptr i8, ptr %getelementptr, i64 80
  store i64 0, ptr %getelementptr14, align 8
  %add = add i64 %phi, 1
  br label %bb2
}
```

There's probably an LSR change related to its search heuristic, but finding a solution that by LSR's metrics is much worse than the starting point and then going ahead with it seems hard to justify.

---

Patch is 52.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89927.diff


9 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+8) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+2) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+4) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+4) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (+2) 
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+15-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll (+63-66) 
- (modified) llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll (+229-245) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+18-18) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 58c69ac939763a..7849da9606b259 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -740,6 +740,10 @@ class TargetTransformInfo {
   /// When successful, makes the primary IV dead.
   bool shouldFoldTerminatingConditionAfterLSR() const;
 
+  /// Return true if LSR should drop a found solution if it's calculated to be
+  /// less profitable than the baseline.
+  bool shouldDropLSRSolutionIfLessProfitable() const;
+
   /// \returns true if LSR should not optimize a chain that includes \p I.
   bool isProfitableLSRChainElement(Instruction *I) const;
 
@@ -1861,6 +1865,7 @@ class TargetTransformInfo::Concept {
                              const TargetTransformInfo::LSRCost &C2) = 0;
   virtual bool isNumRegsMajorCostOfLSR() = 0;
   virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
+  virtual bool shouldDropLSRSolutionIfLessProfitable() const = 0;
   virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
   virtual bool canMacroFuseCmp() = 0;
   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
@@ -2333,6 +2338,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool shouldFoldTerminatingConditionAfterLSR() const override {
     return Impl.shouldFoldTerminatingConditionAfterLSR();
   }
+  bool shouldDropLSRSolutionIfLessProfitable() const override {
+    return Impl.shouldDropLSRSolutionIfLessProfitable();
+  }
   bool isProfitableLSRChainElement(Instruction *I) override {
     return Impl.isProfitableLSRChainElement(I);
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4d5cd963e09267..221e851ba23c01 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -240,6 +240,8 @@ class TargetTransformInfoImplBase {
 
   bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
 
+  bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
+
   bool isProfitableLSRChainElement(Instruction *I) const { return false; }
 
   bool canMacroFuseCmp() const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 06a19c75cf873a..cfa43709e58e10 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -398,6 +398,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         shouldFoldTerminatingConditionAfterLSR();
   }
 
+  bool shouldDropLSRSolutionIfLessProfitable() const {
+    return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();
+  }
+
   bool isProfitableLSRChainElement(Instruction *I) {
     return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
   }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 33c899fe889990..a8f258ab14561e 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -427,6 +427,10 @@ bool TargetTransformInfo::shouldFoldTerminatingConditionAfterLSR() const {
   return TTIImpl->shouldFoldTerminatingConditionAfterLSR();
 }
 
+bool TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable() const {
+  return TTIImpl->shouldDropLSRSolutionIfLessProfitable();
+}
+
 bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
   return TTIImpl->isProfitableLSRChainElement(I);
 }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 2f9281ab892447..fa3a88a700226f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   bool shouldFoldTerminatingConditionAfterLSR() const {
     return true;
   }
+
+  bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ec42e2d6e193a6..1384ecfce373c7 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -193,8 +193,8 @@ static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
     "lsr-term-fold", cl::Hidden,
     cl::desc("Attempt to replace primary IV with other IV."));
 
-static cl::opt<bool> AllowDropSolutionIfLessProfitable(
-    "lsr-drop-solution", cl::Hidden, cl::init(false),
+static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
+    "lsr-drop-solution", cl::Hidden,
     cl::desc("Attempt to drop solution if it is less profitable"));
 
 STATISTIC(NumTermFold,
@@ -5248,10 +5248,22 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
 
   assert(Solution.size() == Uses.size() && "Malformed solution!");
 
+  const bool EnableDropUnprofitableSolution = [&] {
+    switch (AllowDropSolutionIfLessProfitable) {
+    case cl::BOU_TRUE:
+      return true;
+    case cl::BOU_FALSE:
+      return false;
+    case cl::BOU_UNSET:
+      return TTI.shouldDropLSRSolutionIfLessProfitable();
+    }
+    llvm_unreachable("Unhandled cl::boolOrDefault enum");
+  }();
+
   if (BaselineCost.isLess(SolutionCost)) {
     LLVM_DEBUG(dbgs() << "The baseline solution requires ";
                BaselineCost.print(dbgs()); dbgs() << "\n");
-    if (!AllowDropSolutionIfLessProfitable)
+    if (!EnableDropUnprofitableSolution)
       LLVM_DEBUG(
           dbgs() << "Baseline is more profitable than chosen solution, "
                     "add option 'lsr-drop-solution' to drop LSR solution.\n");
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index dc4d28819bbbd8..92639be0017e8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -86,30 +86,29 @@ declare i64 @llvm.vscale.i64()
 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ; NO-SINK-LABEL: sink_splat_add_scalable:
 ; NO-SINK:       # %bb.0: # %entry
-; NO-SINK-NEXT:    csrr a5, vlenb
-; NO-SINK-NEXT:    srli a2, a5, 1
+; NO-SINK-NEXT:    csrr a2, vlenb
+; NO-SINK-NEXT:    srli a2, a2, 1
 ; NO-SINK-NEXT:    li a3, 1024
 ; NO-SINK-NEXT:    bgeu a3, a2, .LBB1_2
 ; NO-SINK-NEXT:  # %bb.1:
 ; NO-SINK-NEXT:    li a3, 0
 ; NO-SINK-NEXT:    j .LBB1_5
 ; NO-SINK-NEXT:  .LBB1_2: # %vector.ph
+; NO-SINK-NEXT:    li a5, 0
 ; NO-SINK-NEXT:    addi a3, a2, -1
 ; NO-SINK-NEXT:    andi a4, a3, 1024
 ; NO-SINK-NEXT:    xori a3, a4, 1024
 ; NO-SINK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
 ; NO-SINK-NEXT:    vmv.v.x v8, a1
-; NO-SINK-NEXT:    slli a5, a5, 1
-; NO-SINK-NEXT:    mv a6, a0
-; NO-SINK-NEXT:    mv a7, a3
 ; NO-SINK-NEXT:  .LBB1_3: # %vector.body
 ; NO-SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT:    slli a6, a5, 2
+; NO-SINK-NEXT:    add a6, a0, a6
 ; NO-SINK-NEXT:    vl2re32.v v10, (a6)
 ; NO-SINK-NEXT:    vadd.vv v10, v10, v8
+; NO-SINK-NEXT:    add a5, a5, a2
 ; NO-SINK-NEXT:    vs2r.v v10, (a6)
-; NO-SINK-NEXT:    sub a7, a7, a2
-; NO-SINK-NEXT:    add a6, a6, a5
-; NO-SINK-NEXT:    bnez a7, .LBB1_3
+; NO-SINK-NEXT:    bne a5, a3, .LBB1_3
 ; NO-SINK-NEXT:  # %bb.4: # %middle.block
 ; NO-SINK-NEXT:    beqz a4, .LBB1_7
 ; NO-SINK-NEXT:  .LBB1_5: # %for.body.preheader
@@ -129,29 +128,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ;
 ; SINK-LABEL: sink_splat_add_scalable:
 ; SINK:       # %bb.0: # %entry
-; SINK-NEXT:    csrr a5, vlenb
-; SINK-NEXT:    srli a2, a5, 1
+; SINK-NEXT:    csrr a2, vlenb
+; SINK-NEXT:    srli a2, a2, 1
 ; SINK-NEXT:    li a3, 1024
 ; SINK-NEXT:    bgeu a3, a2, .LBB1_2
 ; SINK-NEXT:  # %bb.1:
 ; SINK-NEXT:    li a3, 0
 ; SINK-NEXT:    j .LBB1_5
 ; SINK-NEXT:  .LBB1_2: # %vector.ph
+; SINK-NEXT:    li a5, 0
 ; SINK-NEXT:    addi a3, a2, -1
 ; SINK-NEXT:    andi a4, a3, 1024
 ; SINK-NEXT:    xori a3, a4, 1024
-; SINK-NEXT:    slli a5, a5, 1
 ; SINK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
-; SINK-NEXT:    mv a6, a0
-; SINK-NEXT:    mv a7, a3
 ; SINK-NEXT:  .LBB1_3: # %vector.body
 ; SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; SINK-NEXT:    slli a6, a5, 2
+; SINK-NEXT:    add a6, a0, a6
 ; SINK-NEXT:    vl2re32.v v8, (a6)
 ; SINK-NEXT:    vadd.vx v8, v8, a1
+; SINK-NEXT:    add a5, a5, a2
 ; SINK-NEXT:    vs2r.v v8, (a6)
-; SINK-NEXT:    sub a7, a7, a2
-; SINK-NEXT:    add a6, a6, a5
-; SINK-NEXT:    bnez a7, .LBB1_3
+; SINK-NEXT:    bne a5, a3, .LBB1_3
 ; SINK-NEXT:  # %bb.4: # %middle.block
 ; SINK-NEXT:    beqz a4, .LBB1_7
 ; SINK-NEXT:  .LBB1_5: # %for.body.preheader
@@ -171,29 +169,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ;
 ; DEFAULT-LABEL: sink_splat_add_scalable:
 ; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    csrr a5, vlenb
-; DEFAULT-NEXT:    srli a2, a5, 1
+; DEFAULT-NEXT:    csrr a2, vlenb
+; DEFAULT-NEXT:    srli a2, a2, 1
 ; DEFAULT-NEXT:    li a3, 1024
 ; DEFAULT-NEXT:    bgeu a3, a2, .LBB1_2
 ; DEFAULT-NEXT:  # %bb.1:
 ; DEFAULT-NEXT:    li a3, 0
 ; DEFAULT-NEXT:    j .LBB1_5
 ; DEFAULT-NEXT:  .LBB1_2: # %vector.ph
+; DEFAULT-NEXT:    li a5, 0
 ; DEFAULT-NEXT:    addi a3, a2, -1
 ; DEFAULT-NEXT:    andi a4, a3, 1024
 ; DEFAULT-NEXT:    xori a3, a4, 1024
-; DEFAULT-NEXT:    slli a5, a5, 1
 ; DEFAULT-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
-; DEFAULT-NEXT:    mv a6, a0
-; DEFAULT-NEXT:    mv a7, a3
 ; DEFAULT-NEXT:  .LBB1_3: # %vector.body
 ; DEFAULT-NEXT:    # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT:    slli a6, a5, 2
+; DEFAULT-NEXT:    add a6, a0, a6
 ; DEFAULT-NEXT:    vl2re32.v v8, (a6)
 ; DEFAULT-NEXT:    vadd.vx v8, v8, a1
+; DEFAULT-NEXT:    add a5, a5, a2
 ; DEFAULT-NEXT:    vs2r.v v8, (a6)
-; DEFAULT-NEXT:    sub a7, a7, a2
-; DEFAULT-NEXT:    add a6, a6, a5
-; DEFAULT-NEXT:    bnez a7, .LBB1_3
+; DEFAULT-NEXT:    bne a5, a3, .LBB1_3
 ; DEFAULT-NEXT:  # %bb.4: # %middle.block
 ; DEFAULT-NEXT:    beqz a4, .LBB1_7
 ; DEFAULT-NEXT:  .LBB1_5: # %for.body.preheader
@@ -407,32 +404,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; NO-SINK-LABEL: sink_splat_fadd_scalable:
 ; NO-SINK:       # %bb.0: # %entry
 ; NO-SINK-NEXT:    csrr a1, vlenb
-; NO-SINK-NEXT:    srli a2, a1, 2
-; NO-SINK-NEXT:    li a3, 1024
-; NO-SINK-NEXT:    bgeu a3, a2, .LBB4_2
+; NO-SINK-NEXT:    srli a1, a1, 2
+; NO-SINK-NEXT:    li a2, 1024
+; NO-SINK-NEXT:    bgeu a2, a1, .LBB4_2
 ; NO-SINK-NEXT:  # %bb.1:
-; NO-SINK-NEXT:    li a3, 0
+; NO-SINK-NEXT:    li a2, 0
 ; NO-SINK-NEXT:    j .LBB4_5
 ; NO-SINK-NEXT:  .LBB4_2: # %vector.ph
-; NO-SINK-NEXT:    addi a3, a2, -1
-; NO-SINK-NEXT:    andi a4, a3, 1024
-; NO-SINK-NEXT:    xori a3, a4, 1024
+; NO-SINK-NEXT:    li a4, 0
+; NO-SINK-NEXT:    addi a2, a1, -1
+; NO-SINK-NEXT:    andi a3, a2, 1024
+; NO-SINK-NEXT:    xori a2, a3, 1024
 ; NO-SINK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
 ; NO-SINK-NEXT:    vfmv.v.f v8, fa0
-; NO-SINK-NEXT:    mv a5, a0
-; NO-SINK-NEXT:    mv a6, a3
 ; NO-SINK-NEXT:  .LBB4_3: # %vector.body
 ; NO-SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; NO-SINK-NEXT:    slli a5, a4, 2
+; NO-SINK-NEXT:    add a5, a0, a5
 ; NO-SINK-NEXT:    vl1re32.v v9, (a5)
 ; NO-SINK-NEXT:    vfadd.vv v9, v9, v8
+; NO-SINK-NEXT:    add a4, a4, a1
 ; NO-SINK-NEXT:    vs1r.v v9, (a5)
-; NO-SINK-NEXT:    sub a6, a6, a2
-; NO-SINK-NEXT:    add a5, a5, a1
-; NO-SINK-NEXT:    bnez a6, .LBB4_3
+; NO-SINK-NEXT:    bne a4, a2, .LBB4_3
 ; NO-SINK-NEXT:  # %bb.4: # %middle.block
-; NO-SINK-NEXT:    beqz a4, .LBB4_7
+; NO-SINK-NEXT:    beqz a3, .LBB4_7
 ; NO-SINK-NEXT:  .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT:    slli a1, a3, 2
+; NO-SINK-NEXT:    slli a1, a2, 2
 ; NO-SINK-NEXT:    add a1, a0, a1
 ; NO-SINK-NEXT:    lui a2, 1
 ; NO-SINK-NEXT:    add a0, a0, a2
@@ -449,31 +446,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; SINK-LABEL: sink_splat_fadd_scalable:
 ; SINK:       # %bb.0: # %entry
 ; SINK-NEXT:    csrr a1, vlenb
-; SINK-NEXT:    srli a2, a1, 2
-; SINK-NEXT:    li a3, 1024
-; SINK-NEXT:    bgeu a3, a2, .LBB4_2
+; SINK-NEXT:    srli a1, a1, 2
+; SINK-NEXT:    li a2, 1024
+; SINK-NEXT:    bgeu a2, a1, .LBB4_2
 ; SINK-NEXT:  # %bb.1:
-; SINK-NEXT:    li a3, 0
+; SINK-NEXT:    li a2, 0
 ; SINK-NEXT:    j .LBB4_5
 ; SINK-NEXT:  .LBB4_2: # %vector.ph
-; SINK-NEXT:    addi a3, a2, -1
-; SINK-NEXT:    andi a4, a3, 1024
-; SINK-NEXT:    xori a3, a4, 1024
+; SINK-NEXT:    li a4, 0
+; SINK-NEXT:    addi a2, a1, -1
+; SINK-NEXT:    andi a3, a2, 1024
+; SINK-NEXT:    xori a2, a3, 1024
 ; SINK-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
-; SINK-NEXT:    mv a5, a0
-; SINK-NEXT:    mv a6, a3
 ; SINK-NEXT:  .LBB4_3: # %vector.body
 ; SINK-NEXT:    # =>This Inner Loop Header: Depth=1
+; SINK-NEXT:    slli a5, a4, 2
+; SINK-NEXT:    add a5, a0, a5
 ; SINK-NEXT:    vl1re32.v v8, (a5)
 ; SINK-NEXT:    vfadd.vf v8, v8, fa0
+; SINK-NEXT:    add a4, a4, a1
 ; SINK-NEXT:    vs1r.v v8, (a5)
-; SINK-NEXT:    sub a6, a6, a2
-; SINK-NEXT:    add a5, a5, a1
-; SINK-NEXT:    bnez a6, .LBB4_3
+; SINK-NEXT:    bne a4, a2, .LBB4_3
 ; SINK-NEXT:  # %bb.4: # %middle.block
-; SINK-NEXT:    beqz a4, .LBB4_7
+; SINK-NEXT:    beqz a3, .LBB4_7
 ; SINK-NEXT:  .LBB4_5: # %for.body.preheader
-; SINK-NEXT:    slli a1, a3, 2
+; SINK-NEXT:    slli a1, a2, 2
 ; SINK-NEXT:    add a1, a0, a1
 ; SINK-NEXT:    lui a2, 1
 ; SINK-NEXT:    add a0, a0, a2
@@ -490,31 +487,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
 ; DEFAULT-LABEL: sink_splat_fadd_scalable:
 ; DEFAULT:       # %bb.0: # %entry
 ; DEFAULT-NEXT:    csrr a1, vlenb
-; DEFAULT-NEXT:    srli a2, a1, 2
-; DEFAULT-NEXT:    li a3, 1024
-; DEFAULT-NEXT:    bgeu a3, a2, .LBB4_2
+; DEFAULT-NEXT:    srli a1, a1, 2
+; DEFAULT-NEXT:    li a2, 1024
+; DEFAULT-NEXT:    bgeu a2, a1, .LBB4_2
 ; DEFAULT-NEXT:  # %bb.1:
-; DEFAULT-NEXT:    li a3, 0
+; DEFAULT-NEXT:    li a2, 0
 ; DEFAULT-NEXT:    j .LBB4_5
 ; DEFAULT-NEXT:  .LBB4_2: # %vector.ph
-; DEFAULT-NEXT:    addi a3, a2, -1
-; DEFAULT-NEXT:    andi a4, a3, 1024
-; DEFAULT-NEXT:    xori a3, a4, 1024
+; DEFAULT-NEXT:    li a4, 0
+; DEFAULT-NEXT:    addi a2, a1, -1
+; DEFAULT-NEXT:    andi a3, a2, 1024
+; DEFAULT-NEXT:    xori a2, a3, 1024
 ; DEFAULT-NEXT:    vsetvli a5, zero, e32, m1, ta, ma
-; DEFAULT-NEXT:    mv a5, a0
-; DEFAULT-NEXT:    mv a6, a3
 ; DEFAULT-NEXT:  .LBB4_3: # %vector.body
 ; DEFAULT-NEXT:    # =>This Inner Loop Header: Depth=1
+; DEFAULT-NEXT:    slli a5, a4, 2
+; DEFAULT-NEXT:    add a5, a0, a5
 ; DEFAULT-NEXT:    vl1re32.v v8, (a5)
 ; DEFAULT-NEXT:    vfadd.vf v8, v8, fa0
+; DEFAULT-NEXT:    add a4, a4, a1
 ; DEFAULT-NEXT:    vs1r.v v8, (a5)
-; DEFAULT-NEXT:    sub a6, a6, a2
-; DEFAULT-NEXT:    add a5, a5, a1
-; DEFAULT-NEXT:    bnez a6, .LBB4_3
+; DEFAULT-NEXT:    bne a4, a2, .LBB4_3
 ; DEFAULT-NEXT:  # %bb.4: # %middle.block
-; DEFAULT-NEXT:    beqz a4, .LBB4_7
+; DEFAULT-NEXT:    beqz a3, .LBB4_7
 ; DEFAULT-NEXT:  .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT:    slli a1, a3, 2
+; DEFAULT-NEXT:    slli a1, a2, 2
 ; DEFAULT-NEXT:    add a1, a0, a1
 ; DEFAULT-NEXT:    lui a2, 1
 ; DEFAULT-NEXT:    add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 9046c861c3367a..94b5996b768f80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -243,29 +243,28 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_mul_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB7_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB7_5
 ; CHECK-NEXT:  .LBB7_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB7_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vmul.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB7_3
+; CHECK-NEXT:    bne a5, a3, .LBB7_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB7_7
 ; CHECK-NEXT:  .LBB7_5: # %for.body.preheader
@@ -334,29 +333,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_add_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB8_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB8_5
 ; CHECK-NEXT:  .LBB8_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB8_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vadd.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB8_3
+; CHECK-NEXT:    bne a5, a3, .LBB8_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB8_7
 ; CHECK-NEXT:  .LBB8_5: # %for.body.preheader
@@ -425,29 +423,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_sub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB9_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB9_5
 ; CHECK-NEXT:  .LBB9_2: # %vector.ph
+; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    addi a3, a2, -1
 ; CHECK-NEXT:    andi a4, a3, 1024
 ; CHECK-NEXT:    xori a3, a4, 1024
-; CHECK-NEXT:    slli a5, a5, 1
 ; CHECK-NEXT:    vsetvli a6, zero, e32, m2, ta, ma
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB9_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    slli a6, a5, 2
+; CHECK-NEXT:    add a6, a0, a6
 ; CHECK-NEXT:    vl2re32.v v8, (a6)
 ; CHECK-NEXT:    vsub.vx v8, v8, a1
+; CHECK-NEXT:    add a5, a5, a2
 ; CHECK-NEXT:    vs2r.v v8, (a6)
-; CHECK-NEXT:    sub a7, a7, a2
-; CHECK-NEXT:    add a6, a6, a5
-; CHECK-NEXT:    bnez a7, .LBB9_3
+; CHECK-NEXT:    bne a5, a3, .LBB9_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB9_7
 ; CHECK-NEXT:  .LBB9_5: # %for.body.preheader
@@ -516,29 +513,28 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_rsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a5, vlenb
-; CHECK-NEXT:    srli a2, a5, 1
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 1
 ; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:    bgeu a3, a2, .LBB10_2
 ; CHECK-NEXT:  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/89927