[llvm] [RISCV] Move MachineCombiner to addILPOpts() (PR #158071)

Thu Sep 11 06:22:20 PDT 2025

https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/158071

So that it runs before `MachineCSE` and other passes.

Fixes https://github.com/llvm/llvm-project/issues/158063.


>From 42ad47cc1674e1ab836ad9ffb2f79fc6bfe38419 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Thu, 11 Sep 2025 21:18:08 +0800
Subject: [PATCH] [RISCV] Move MachineCombiner to addILPOpts()

So that it runs before `MachineCSE` and other passes.

Fixes https://github.com/llvm/llvm-project/issues/158063.
---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  | 11 ++-
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |  6 +-
 llvm/test/CodeGen/RISCV/machine-combiner.ll   | 43 ++++-----
 llvm/test/CodeGen/RISCV/neg-abs.ll            | 24 ++---
 .../fixed-vectors-strided-load-store-asm.ll   |  2 +-
 .../RISCV/rvv/vxrm-insert-out-of-loop.ll      | 89 ++++++++++---------
 .../CodeGen/RISCV/short-forward-branch-opt.ll |  4 +-
 7 files changed, 87 insertions(+), 92 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 460bb33f2553a..d7e0be65fb6fa 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -427,6 +427,7 @@ class RISCVPassConfig : public TargetPassConfig {
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   void addFastRegAlloc() override;
+  bool addILPOpts() override;
 
   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
@@ -612,9 +613,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
 
   TargetPassConfig::addMachineSSAOptimization();
 
-  if (EnableMachineCombiner)
-    addPass(&MachineCombinerID);
-
   if (TM->getTargetTriple().isRISCV64()) {
     addPass(createRISCVOptWInstrsPass());
   }
@@ -649,6 +647,13 @@ void RISCVPassConfig::addPostRegAlloc() {
     addPass(createRISCVRedundantCopyEliminationPass());
 }
 
+bool RISCVPassConfig::addILPOpts() {
+  if (EnableMachineCombiner)
+    addPass(&MachineCombinerID);
+
+  return true;
+}
+
 void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
   PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
                                                  OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index c7f70a9d266c2..ea08061221fd4 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -107,6 +107,9 @@
 ; CHECK-NEXT:       Remove dead machine instructions
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
+; CHECK-NEXT:       Machine Trace Metrics 
+; CHECK-NEXT:       Lazy Machine Block Frequency Analysis 
+; CHECK-NEXT:       Machine InstCombiner 
 ; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
 ; CHECK-NEXT:       MachineDominator Tree Construction
@@ -117,9 +120,6 @@
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
-; CHECK-NEXT:       Machine Trace Metrics
-; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
-; CHECK-NEXT:       Machine InstCombiner
 ; RV64-NEXT:        RISC-V Optimize W Instructions
 ; CHECK-NEXT:       RISC-V Pre-RA pseudo instruction expansion pass
 ; CHECK-NEXT:       RISC-V Merge Base Offset
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 7a1c41c1839fa..69eca6dd7768a 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1094,33 +1094,19 @@ declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
 
 define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
-; CHECK_LOCAL-LABEL: test_fmadd_strategy:
-; CHECK_LOCAL:       # %bb.0: # %entry
-; CHECK_LOCAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT:    andi a0, a0, 1
-; CHECK_LOCAL-NEXT:    fmv.d fa5, fa0
-; CHECK_LOCAL-NEXT:    fmul.d fa0, fa4, fa2
-; CHECK_LOCAL-NEXT:    beqz a0, .LBB76_2
-; CHECK_LOCAL-NEXT:  # %bb.1: # %entry
-; CHECK_LOCAL-NEXT:    fmul.d fa4, fa5, fa1
-; CHECK_LOCAL-NEXT:    fmadd.d fa5, fa5, fa1, fa0
-; CHECK_LOCAL-NEXT:    fsub.d fa0, fa5, fa4
-; CHECK_LOCAL-NEXT:  .LBB76_2: # %entry
-; CHECK_LOCAL-NEXT:    ret
-;
-; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
-; CHECK_GLOBAL:       # %bb.0: # %entry
-; CHECK_GLOBAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
-; CHECK_GLOBAL-NEXT:    fmv.d fa5, fa0
-; CHECK_GLOBAL-NEXT:    fmul.d fa0, fa4, fa2
-; CHECK_GLOBAL-NEXT:    beqz a0, .LBB76_2
-; CHECK_GLOBAL-NEXT:  # %bb.1: # %entry
-; CHECK_GLOBAL-NEXT:    fmul.d fa5, fa5, fa1
-; CHECK_GLOBAL-NEXT:    fadd.d fa4, fa5, fa0
-; CHECK_GLOBAL-NEXT:    fsub.d fa0, fa4, fa5
-; CHECK_GLOBAL-NEXT:  .LBB76_2: # %entry
-; CHECK_GLOBAL-NEXT:    ret
+; CHECK-LABEL: test_fmadd_strategy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fsub.d fa5, fa0, fa1
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    beqz a0, .LBB76_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    fmul.d fa4, fa0, fa1
+; CHECK-NEXT:    fmadd.d fa5, fa5, fa2, fa4
+; CHECK-NEXT:    fsub.d fa0, fa5, fa4
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB76_2:
+; CHECK-NEXT:    fmul.d fa0, fa5, fa2
+; CHECK-NEXT:    ret
 entry:
   %sub = fsub contract double %a0, %a1
   %mul = fmul contract double %sub, %a2
@@ -1132,3 +1118,6 @@ entry:
   %retval.0 = select i1 %tobool.not, double %mul, double %sub3
   ret double %retval.0
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK_GLOBAL: {{.*}}
+; CHECK_LOCAL: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index da81fe5708814..f9ccf7637eee9 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -208,14 +208,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
 ; RV32I-NEXT:    sub a1, a1, a3
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:  .LBB5_2:
-; RV32I-NEXT:    snez a3, a0
-; RV32I-NEXT:    neg a4, a1
-; RV32I-NEXT:    sub a3, a4, a3
-; RV32I-NEXT:    neg a4, a0
+; RV32I-NEXT:    snez a4, a0
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    add a4, a1, a4
+; RV32I-NEXT:    neg a4, a4
 ; RV32I-NEXT:    sw a0, 0(a2)
 ; RV32I-NEXT:    sw a1, 4(a2)
-; RV32I-NEXT:    mv a0, a4
-; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv a0, a3
+; RV32I-NEXT:    mv a1, a4
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: neg_abs64_multiuse:
@@ -227,14 +227,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
 ; RV32ZBB-NEXT:    sub a1, a1, a3
 ; RV32ZBB-NEXT:    neg a0, a0
 ; RV32ZBB-NEXT:  .LBB5_2:
-; RV32ZBB-NEXT:    snez a3, a0
-; RV32ZBB-NEXT:    neg a4, a1
-; RV32ZBB-NEXT:    sub a3, a4, a3
-; RV32ZBB-NEXT:    neg a4, a0
+; RV32ZBB-NEXT:    snez a4, a0
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    add a4, a1, a4
+; RV32ZBB-NEXT:    neg a4, a4
 ; RV32ZBB-NEXT:    sw a0, 0(a2)
 ; RV32ZBB-NEXT:    sw a1, 4(a2)
-; RV32ZBB-NEXT:    mv a0, a4
-; RV32ZBB-NEXT:    mv a1, a3
+; RV32ZBB-NEXT:    mv a0, a3
+; RV32ZBB-NEXT:    mv a1, a4
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64I-LABEL: neg_abs64_multiuse:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 83b435ddff902..056f55260b854 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -934,7 +934,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
 ; CHECK-NEXT:    add a1, a1, a5
 ; CHECK-NEXT:    slli a3, a3, 32
 ; CHECK-NEXT:    srli a3, a3, 32
-; CHECK-NEXT:    add a0, a4, a0
+; CHECK-NEXT:    add a0, a0, a4
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    addi a0, a0, 1
 ; CHECK-NEXT:  .LBB14_6: # %bb35
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index dddcd4f107e3b..ead79fcf53d8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:  # %bb.1: # %for.cond1.preheader.lr.ph
 ; RV32-NEXT:    blez a6, .LBB0_17
 ; RV32-NEXT:  # %bb.2: # %for.cond1.preheader.us.preheader
-; RV32-NEXT:    addi t0, a7, -1
+; RV32-NEXT:    addi t3, a7, -1
 ; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    mul t3, a1, t0
-; RV32-NEXT:    mul t4, a3, t0
-; RV32-NEXT:    mul t5, a5, t0
 ; RV32-NEXT:    slli t1, t2, 1
-; RV32-NEXT:    li t6, 32
+; RV32-NEXT:    li t4, 32
 ; RV32-NEXT:    mv t0, t1
 ; RV32-NEXT:  # %bb.3: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    .cfi_offset s1, -8
 ; RV32-NEXT:    .cfi_offset s2, -12
+; RV32-NEXT:    .cfi_offset s3, -16
 ; RV32-NEXT:    .cfi_remember_state
-; RV32-NEXT:    add t3, a0, t3
-; RV32-NEXT:    add t4, a2, t4
-; RV32-NEXT:    add s0, a4, t5
-; RV32-NEXT:    bltu t6, t1, .LBB0_6
+; RV32-NEXT:    mul t5, a1, t3
+; RV32-NEXT:    add s0, a0, a6
+; RV32-NEXT:    mul t6, a3, t3
+; RV32-NEXT:    add s2, a2, a6
+; RV32-NEXT:    mul s1, a5, t3
+; RV32-NEXT:    add s3, a4, a6
+; RV32-NEXT:    bltu t4, t1, .LBB0_6
 ; RV32-NEXT:  # %bb.5: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    li t1, 32
 ; RV32-NEXT:  .LBB0_6: # %for.cond1.preheader.us.preheader
-; RV32-NEXT:    add t3, t3, a6
-; RV32-NEXT:    add t5, t4, a6
-; RV32-NEXT:    add t4, s0, a6
+; RV32-NEXT:    add t3, s0, t5
+; RV32-NEXT:    add t6, s2, t6
+; RV32-NEXT:    add t4, s3, s1
 ; RV32-NEXT:    j .LBB0_8
 ; RV32-NEXT:  # %bb.7: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    mv t1, t0
 ; RV32-NEXT:  .LBB0_8: # %for.cond1.preheader.us.preheader
 ; RV32-NEXT:    .cfi_restore_state
 ; RV32-NEXT:    li t0, 0
-; RV32-NEXT:    sltu t5, a0, t5
+; RV32-NEXT:    sltu t5, a0, t6
 ; RV32-NEXT:    sltu t6, a2, t3
 ; RV32-NEXT:    and t5, t5, t6
 ; RV32-NEXT:    sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    .cfi_restore s0
 ; RV32-NEXT:    .cfi_restore s1
 ; RV32-NEXT:    .cfi_restore s2
+; RV32-NEXT:    .cfi_restore s3
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:  .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    or t6, s0, s1
 ; RV64P670-NEXT:    sltu s1, a0, t5
 ; RV64P670-NEXT:    sltu s0, a4, t4
-; RV64P670-NEXT:    mv t5, a0
+; RV64P670-NEXT:    add t4, a0, a6
 ; RV64P670-NEXT:    and s0, s0, s1
 ; RV64P670-NEXT:    or s1, a1, a5
 ; RV64P670-NEXT:    srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    or s0, t6, s0
 ; RV64P670-NEXT:    sltu s1, a6, s1
 ; RV64P670-NEXT:    or s0, s0, s1
-; RV64P670-NEXT:    andi t4, s0, 1
+; RV64P670-NEXT:    andi t5, s0, 1
 ; RV64P670-NEXT:    j .LBB0_4
 ; RV64P670-NEXT:  .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64P670-NEXT:    # in Loop: Header=BB0_4 Depth=1
-; RV64P670-NEXT:    add t5, t5, a1
+; RV64P670-NEXT:    add a0, a0, a1
 ; RV64P670-NEXT:    add a2, a2, a3
 ; RV64P670-NEXT:    add a4, a4, a5
 ; RV64P670-NEXT:    addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    # =>This Loop Header: Depth=1
 ; RV64P670-NEXT:    # Child Loop BB0_7 Depth 2
 ; RV64P670-NEXT:    # Child Loop BB0_10 Depth 2
-; RV64P670-NEXT:    beqz t4, .LBB0_6
+; RV64P670-NEXT:    beqz t5, .LBB0_6
 ; RV64P670-NEXT:  # %bb.5: # in Loop: Header=BB0_4 Depth=1
 ; RV64P670-NEXT:    li t6, 0
 ; RV64P670-NEXT:    j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:    slli s1, t2, 28
 ; RV64P670-NEXT:    mv s2, a2
 ; RV64P670-NEXT:    mv s3, a4
-; RV64P670-NEXT:    mv s4, t5
+; RV64P670-NEXT:    mv s4, a0
 ; RV64P670-NEXT:    sub s1, s1, t3
 ; RV64P670-NEXT:    vsetvli s0, zero, e8, m2, ta, ma
 ; RV64P670-NEXT:    and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64P670-NEXT:  .LBB0_9: # %for.body4.us.preheader
 ; RV64P670-NEXT:    # in Loop: Header=BB0_4 Depth=1
 ; RV64P670-NEXT:    mul s2, a1, t0
-; RV64P670-NEXT:    add s0, a0, a6
-; RV64P670-NEXT:    add s1, t5, t6
+; RV64P670-NEXT:    add s1, a0, t6
 ; RV64P670-NEXT:    add s4, a4, t6
 ; RV64P670-NEXT:    add t6, t6, a2
-; RV64P670-NEXT:    add s2, s2, s0
+; RV64P670-NEXT:    add s2, s2, t4
 ; RV64P670-NEXT:  .LBB0_10: # %for.body4.us
 ; RV64P670-NEXT:    # Parent Loop BB0_4 Depth=1
 ; RV64P670-NEXT:    # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    or s0, t4, s0
 ; RV64X60-NEXT:    sltu s1, a6, s1
 ; RV64X60-NEXT:    or s0, s0, s1
-; RV64X60-NEXT:    andi t4, s0, 1
-; RV64X60-NEXT:    mv t5, a0
+; RV64X60-NEXT:    add t4, a0, a6
+; RV64X60-NEXT:    andi t5, s0, 1
 ; RV64X60-NEXT:    j .LBB0_4
 ; RV64X60-NEXT:  .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64X60-NEXT:    # in Loop: Header=BB0_4 Depth=1
-; RV64X60-NEXT:    add t5, t5, a1
+; RV64X60-NEXT:    add a0, a0, a1
 ; RV64X60-NEXT:    add a2, a2, a3
 ; RV64X60-NEXT:    addiw t1, t1, 1
 ; RV64X60-NEXT:    add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    # =>This Loop Header: Depth=1
 ; RV64X60-NEXT:    # Child Loop BB0_7 Depth 2
 ; RV64X60-NEXT:    # Child Loop BB0_10 Depth 2
-; RV64X60-NEXT:    beqz t4, .LBB0_6
+; RV64X60-NEXT:    beqz t5, .LBB0_6
 ; RV64X60-NEXT:  # %bb.5: # in Loop: Header=BB0_4 Depth=1
 ; RV64X60-NEXT:    li t6, 0
 ; RV64X60-NEXT:    j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    and t6, s1, a6
 ; RV64X60-NEXT:    mv s2, a2
 ; RV64X60-NEXT:    mv s3, a4
-; RV64X60-NEXT:    mv s4, t5
+; RV64X60-NEXT:    mv s4, a0
 ; RV64X60-NEXT:    mv s1, t6
 ; RV64X60-NEXT:    vsetvli s0, zero, e8, m2, ta, ma
 ; RV64X60-NEXT:  .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:  .LBB0_9: # %for.body4.us.preheader
 ; RV64X60-NEXT:    # in Loop: Header=BB0_4 Depth=1
 ; RV64X60-NEXT:    mul s2, a1, t0
-; RV64X60-NEXT:    add s1, a0, a6
-; RV64X60-NEXT:    add s0, t5, t6
-; RV64X60-NEXT:    add s2, s2, s1
+; RV64X60-NEXT:    add s0, a0, t6
+; RV64X60-NEXT:    add s2, s2, t4
 ; RV64X60-NEXT:    add s4, a4, t6
 ; RV64X60-NEXT:    add t6, t6, a2
 ; RV64X60-NEXT:  .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    or s0, a1, a5
 ; RV64-NEXT:    srli s0, s0, 63
 ; RV64-NEXT:    or t5, t5, s0
+; RV64-NEXT:    sltu s0, a6, t4
 ; RV64-NEXT:    or t5, t6, t5
-; RV64-NEXT:    sltu t4, a6, t4
-; RV64-NEXT:    or t4, t4, t5
-; RV64-NEXT:    andi t4, t4, 1
-; RV64-NEXT:    mv t5, a0
+; RV64-NEXT:    add t4, a0, a6
+; RV64-NEXT:    or t5, s0, t5
+; RV64-NEXT:    andi t5, t5, 1
 ; RV64-NEXT:    csrwi vxrm, 0
 ; RV64-NEXT:    j .LBB0_6
 ; RV64-NEXT:  .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
 ; RV64-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT:    add t5, t5, a1
+; RV64-NEXT:    add a0, a0, a1
 ; RV64-NEXT:    add a2, a2, a3
 ; RV64-NEXT:    add a4, a4, a5
 ; RV64-NEXT:    addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    # =>This Loop Header: Depth=1
 ; RV64-NEXT:    # Child Loop BB0_9 Depth 2
 ; RV64-NEXT:    # Child Loop BB0_12 Depth 2
-; RV64-NEXT:    beqz t4, .LBB0_8
+; RV64-NEXT:    beqz t5, .LBB0_8
 ; RV64-NEXT:  # %bb.7: # in Loop: Header=BB0_6 Depth=1
 ; RV64-NEXT:    li t6, 0
 ; RV64-NEXT:    j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    and t6, t6, a6
 ; RV64-NEXT:    mv s0, a2
 ; RV64-NEXT:    mv s1, a4
-; RV64-NEXT:    mv s2, t5
+; RV64-NEXT:    mv s2, a0
 ; RV64-NEXT:    mv s3, t6
 ; RV64-NEXT:    vsetvli s4, zero, e8, m2, ta, ma
 ; RV64-NEXT:  .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64-NEXT:    beq t6, a6, .LBB0_5
 ; RV64-NEXT:  .LBB0_11: # %for.body4.us.preheader
 ; RV64-NEXT:    # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT:    mul s1, a1, t2
-; RV64-NEXT:    add s2, a0, a6
-; RV64-NEXT:    add s0, t5, t6
-; RV64-NEXT:    add s1, s2, s1
-; RV64-NEXT:    add s2, a4, t6
+; RV64-NEXT:    mul s2, a1, t2
+; RV64-NEXT:    add s0, a0, t6
+; RV64-NEXT:    add s1, a4, t6
+; RV64-NEXT:    add s2, t4, s2
 ; RV64-NEXT:    add t6, a2, t6
 ; RV64-NEXT:  .LBB0_12: # %for.body4.us
 ; RV64-NEXT:    # Parent Loop BB0_6 Depth=1
 ; RV64-NEXT:    # => This Inner Loop Header: Depth=2
 ; RV64-NEXT:    lbu s3, 0(t6)
-; RV64-NEXT:    lbu s4, 0(s2)
+; RV64-NEXT:    lbu s4, 0(s1)
 ; RV64-NEXT:    add s3, s3, s4
 ; RV64-NEXT:    addi s3, s3, 1
 ; RV64-NEXT:    srli s3, s3, 1
 ; RV64-NEXT:    sb s3, 0(s0)
 ; RV64-NEXT:    addi s0, s0, 1
-; RV64-NEXT:    addi s2, s2, 1
+; RV64-NEXT:    addi s1, s1, 1
 ; RV64-NEXT:    addi t6, t6, 1
-; RV64-NEXT:    bne s0, s1, .LBB0_12
+; RV64-NEXT:    bne s0, s2, .LBB0_12
 ; RV64-NEXT:    j .LBB0_5
 ; RV64-NEXT:  .LBB0_13:
 ; RV64-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index 59a702ab6b17f..1bfeeb92e06dd 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -2075,14 +2075,14 @@ define i64 @abs_i64(i64 %x) {
 ; RV32SFB-LABEL: abs_i64:
 ; RV32SFB:       # %bb.0:
 ; RV32SFB-NEXT:    snez a2, a0
-; RV32SFB-NEXT:    add a2, a2, a1
+; RV32SFB-NEXT:    neg a3, a1
 ; RV32SFB-NEXT:    bgez a1, .LBB35_2
 ; RV32SFB-NEXT:  # %bb.1:
 ; RV32SFB-NEXT:    neg a0, a0
 ; RV32SFB-NEXT:  .LBB35_2:
 ; RV32SFB-NEXT:    bgez a1, .LBB35_4
 ; RV32SFB-NEXT:  # %bb.3:
-; RV32SFB-NEXT:    neg a1, a2
+; RV32SFB-NEXT:    sub a1, a3, a2
 ; RV32SFB-NEXT:  .LBB35_4:
 ; RV32SFB-NEXT:    ret
   %a = call i64 @llvm.abs.i64(i64 %x, i1 false)