[llvm] [RISCV] Move MachineCombiner to addILPOpts() (PR #158071)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 06:22:20 PDT 2025
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/158071
So that it runs before `MachineCSE` and other passes.
Fixes https://github.com/llvm/llvm-project/issues/158063.
>From 42ad47cc1674e1ab836ad9ffb2f79fc6bfe38419 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Thu, 11 Sep 2025 21:18:08 +0800
Subject: [PATCH] [RISCV] Move MachineCombiner to addILPOpts()
So that it runs before `MachineCSE` and other passes.
Fixes https://github.com/llvm/llvm-project/issues/158063.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 11 ++-
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 6 +-
llvm/test/CodeGen/RISCV/machine-combiner.ll | 43 ++++-----
llvm/test/CodeGen/RISCV/neg-abs.ll | 24 ++---
.../fixed-vectors-strided-load-store-asm.ll | 2 +-
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 89 ++++++++++---------
.../CodeGen/RISCV/short-forward-branch-opt.ll | 4 +-
7 files changed, 87 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 460bb33f2553a..d7e0be65fb6fa 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -427,6 +427,7 @@ class RISCVPassConfig : public TargetPassConfig {
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addFastRegAlloc() override;
+ bool addILPOpts() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -612,9 +613,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
TargetPassConfig::addMachineSSAOptimization();
- if (EnableMachineCombiner)
- addPass(&MachineCombinerID);
-
if (TM->getTargetTriple().isRISCV64()) {
addPass(createRISCVOptWInstrsPass());
}
@@ -649,6 +647,13 @@ void RISCVPassConfig::addPostRegAlloc() {
addPass(createRISCVRedundantCopyEliminationPass());
}
+bool RISCVPassConfig::addILPOpts() {
+ if (EnableMachineCombiner)
+ addPass(&MachineCombinerID);
+
+ return true;
+}
+
void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index c7f70a9d266c2..ea08061221fd4 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -107,6 +107,9 @@
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
+; CHECK-NEXT: Machine Trace Metrics
+; CHECK-NEXT: Lazy Machine Block Frequency Analysis
+; CHECK-NEXT: Machine InstCombiner
; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
; CHECK-NEXT: MachineDominator Tree Construction
@@ -117,9 +120,6 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
-; CHECK-NEXT: Machine Trace Metrics
-; CHECK-NEXT: Lazy Machine Block Frequency Analysis
-; CHECK-NEXT: Machine InstCombiner
; RV64-NEXT: RISC-V Optimize W Instructions
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Merge Base Offset
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 7a1c41c1839fa..69eca6dd7768a 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1094,33 +1094,19 @@ declare float @llvm.maxnum.f32(float, float)
declare double @llvm.maxnum.f64(double, double)
define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
-; CHECK_LOCAL-LABEL: test_fmadd_strategy:
-; CHECK_LOCAL: # %bb.0: # %entry
-; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT: andi a0, a0, 1
-; CHECK_LOCAL-NEXT: fmv.d fa5, fa0
-; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2
-; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2
-; CHECK_LOCAL-NEXT: # %bb.1: # %entry
-; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1
-; CHECK_LOCAL-NEXT: fmadd.d fa5, fa5, fa1, fa0
-; CHECK_LOCAL-NEXT: fsub.d fa0, fa5, fa4
-; CHECK_LOCAL-NEXT: .LBB76_2: # %entry
-; CHECK_LOCAL-NEXT: ret
-;
-; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
-; CHECK_GLOBAL: # %bb.0: # %entry
-; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT: andi a0, a0, 1
-; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0
-; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2
-; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2
-; CHECK_GLOBAL-NEXT: # %bb.1: # %entry
-; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1
-; CHECK_GLOBAL-NEXT: fadd.d fa4, fa5, fa0
-; CHECK_GLOBAL-NEXT: fsub.d fa0, fa4, fa5
-; CHECK_GLOBAL-NEXT: .LBB76_2: # %entry
-; CHECK_GLOBAL-NEXT: ret
+; CHECK-LABEL: test_fmadd_strategy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsub.d fa5, fa0, fa1
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: beqz a0, .LBB76_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmadd.d fa5, fa5, fa2, fa4
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB76_2:
+; CHECK-NEXT: fmul.d fa0, fa5, fa2
+; CHECK-NEXT: ret
entry:
%sub = fsub contract double %a0, %a1
%mul = fmul contract double %sub, %a2
@@ -1132,3 +1118,6 @@ entry:
%retval.0 = select i1 %tobool.not, double %mul, double %sub3
ret double %retval.0
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK_GLOBAL: {{.*}}
+; CHECK_LOCAL: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index da81fe5708814..f9ccf7637eee9 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -208,14 +208,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: .LBB5_2:
-; RV32I-NEXT: snez a3, a0
-; RV32I-NEXT: neg a4, a1
-; RV32I-NEXT: sub a3, a4, a3
-; RV32I-NEXT: neg a4, a0
+; RV32I-NEXT: snez a4, a0
+; RV32I-NEXT: neg a3, a0
+; RV32I-NEXT: add a4, a1, a4
+; RV32I-NEXT: neg a4, a4
; RV32I-NEXT: sw a0, 0(a2)
; RV32I-NEXT: sw a1, 4(a2)
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: neg_abs64_multiuse:
@@ -227,14 +227,14 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: neg a0, a0
; RV32ZBB-NEXT: .LBB5_2:
-; RV32ZBB-NEXT: snez a3, a0
-; RV32ZBB-NEXT: neg a4, a1
-; RV32ZBB-NEXT: sub a3, a4, a3
-; RV32ZBB-NEXT: neg a4, a0
+; RV32ZBB-NEXT: snez a4, a0
+; RV32ZBB-NEXT: neg a3, a0
+; RV32ZBB-NEXT: add a4, a1, a4
+; RV32ZBB-NEXT: neg a4, a4
; RV32ZBB-NEXT: sw a0, 0(a2)
; RV32ZBB-NEXT: sw a1, 4(a2)
-; RV32ZBB-NEXT: mv a0, a4
-; RV32ZBB-NEXT: mv a1, a3
+; RV32ZBB-NEXT: mv a0, a3
+; RV32ZBB-NEXT: mv a1, a4
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: neg_abs64_multiuse:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 83b435ddff902..056f55260b854 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -934,7 +934,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
; CHECK-NEXT: add a1, a1, a5
; CHECK-NEXT: slli a3, a3, 32
; CHECK-NEXT: srli a3, a3, 32
-; CHECK-NEXT: add a0, a4, a0
+; CHECK-NEXT: add a0, a0, a4
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: .LBB14_6: # %bb35
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index dddcd4f107e3b..ead79fcf53d8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -18,13 +18,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph
; RV32-NEXT: blez a6, .LBB0_17
; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
-; RV32-NEXT: addi t0, a7, -1
+; RV32-NEXT: addi t3, a7, -1
; RV32-NEXT: csrr t2, vlenb
-; RV32-NEXT: mul t3, a1, t0
-; RV32-NEXT: mul t4, a3, t0
-; RV32-NEXT: mul t5, a5, t0
; RV32-NEXT: slli t1, t2, 1
-; RV32-NEXT: li t6, 32
+; RV32-NEXT: li t4, 32
; RV32-NEXT: mv t0, t1
; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t0, 32
@@ -34,27 +31,32 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset s0, -4
; RV32-NEXT: .cfi_offset s1, -8
; RV32-NEXT: .cfi_offset s2, -12
+; RV32-NEXT: .cfi_offset s3, -16
; RV32-NEXT: .cfi_remember_state
-; RV32-NEXT: add t3, a0, t3
-; RV32-NEXT: add t4, a2, t4
-; RV32-NEXT: add s0, a4, t5
-; RV32-NEXT: bltu t6, t1, .LBB0_6
+; RV32-NEXT: mul t5, a1, t3
+; RV32-NEXT: add s0, a0, a6
+; RV32-NEXT: mul t6, a3, t3
+; RV32-NEXT: add s2, a2, a6
+; RV32-NEXT: mul s1, a5, t3
+; RV32-NEXT: add s3, a4, a6
+; RV32-NEXT: bltu t4, t1, .LBB0_6
; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader
; RV32-NEXT: li t1, 32
; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader
-; RV32-NEXT: add t3, t3, a6
-; RV32-NEXT: add t5, t4, a6
-; RV32-NEXT: add t4, s0, a6
+; RV32-NEXT: add t3, s0, t5
+; RV32-NEXT: add t6, s2, t6
+; RV32-NEXT: add t4, s3, s1
; RV32-NEXT: j .LBB0_8
; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader
; RV32-NEXT: mv t1, t0
; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader
; RV32-NEXT: .cfi_restore_state
; RV32-NEXT: li t0, 0
-; RV32-NEXT: sltu t5, a0, t5
+; RV32-NEXT: sltu t5, a0, t6
; RV32-NEXT: sltu t6, a2, t3
; RV32-NEXT: and t5, t5, t6
; RV32-NEXT: sltu t4, a0, t4
@@ -140,9 +142,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: .cfi_restore s1
; RV32-NEXT: .cfi_restore s2
+; RV32-NEXT: .cfi_restore s3
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: .LBB0_17: # %for.cond.cleanup
@@ -190,7 +194,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: or t6, s0, s1
; RV64P670-NEXT: sltu s1, a0, t5
; RV64P670-NEXT: sltu s0, a4, t4
-; RV64P670-NEXT: mv t5, a0
+; RV64P670-NEXT: add t4, a0, a6
; RV64P670-NEXT: and s0, s0, s1
; RV64P670-NEXT: or s1, a1, a5
; RV64P670-NEXT: srli s1, s1, 63
@@ -200,11 +204,11 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: or s0, t6, s0
; RV64P670-NEXT: sltu s1, a6, s1
; RV64P670-NEXT: or s0, s0, s1
-; RV64P670-NEXT: andi t4, s0, 1
+; RV64P670-NEXT: andi t5, s0, 1
; RV64P670-NEXT: j .LBB0_4
; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
-; RV64P670-NEXT: add t5, t5, a1
+; RV64P670-NEXT: add a0, a0, a1
; RV64P670-NEXT: add a2, a2, a3
; RV64P670-NEXT: add a4, a4, a5
; RV64P670-NEXT: addiw t1, t1, 1
@@ -214,7 +218,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: # =>This Loop Header: Depth=1
; RV64P670-NEXT: # Child Loop BB0_7 Depth 2
; RV64P670-NEXT: # Child Loop BB0_10 Depth 2
-; RV64P670-NEXT: beqz t4, .LBB0_6
+; RV64P670-NEXT: beqz t5, .LBB0_6
; RV64P670-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
; RV64P670-NEXT: li t6, 0
; RV64P670-NEXT: j .LBB0_9
@@ -223,7 +227,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: slli s1, t2, 28
; RV64P670-NEXT: mv s2, a2
; RV64P670-NEXT: mv s3, a4
-; RV64P670-NEXT: mv s4, t5
+; RV64P670-NEXT: mv s4, a0
; RV64P670-NEXT: sub s1, s1, t3
; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
; RV64P670-NEXT: and t6, s1, a6
@@ -246,11 +250,10 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: .LBB0_9: # %for.body4.us.preheader
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
; RV64P670-NEXT: mul s2, a1, t0
-; RV64P670-NEXT: add s0, a0, a6
-; RV64P670-NEXT: add s1, t5, t6
+; RV64P670-NEXT: add s1, a0, t6
; RV64P670-NEXT: add s4, a4, t6
; RV64P670-NEXT: add t6, t6, a2
-; RV64P670-NEXT: add s2, s2, s0
+; RV64P670-NEXT: add s2, s2, t4
; RV64P670-NEXT: .LBB0_10: # %for.body4.us
; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1
; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
@@ -332,12 +335,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: or s0, t4, s0
; RV64X60-NEXT: sltu s1, a6, s1
; RV64X60-NEXT: or s0, s0, s1
-; RV64X60-NEXT: andi t4, s0, 1
-; RV64X60-NEXT: mv t5, a0
+; RV64X60-NEXT: add t4, a0, a6
+; RV64X60-NEXT: andi t5, s0, 1
; RV64X60-NEXT: j .LBB0_4
; RV64X60-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
-; RV64X60-NEXT: add t5, t5, a1
+; RV64X60-NEXT: add a0, a0, a1
; RV64X60-NEXT: add a2, a2, a3
; RV64X60-NEXT: addiw t1, t1, 1
; RV64X60-NEXT: add a4, a4, a5
@@ -347,7 +350,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: # =>This Loop Header: Depth=1
; RV64X60-NEXT: # Child Loop BB0_7 Depth 2
; RV64X60-NEXT: # Child Loop BB0_10 Depth 2
-; RV64X60-NEXT: beqz t4, .LBB0_6
+; RV64X60-NEXT: beqz t5, .LBB0_6
; RV64X60-NEXT: # %bb.5: # in Loop: Header=BB0_4 Depth=1
; RV64X60-NEXT: li t6, 0
; RV64X60-NEXT: j .LBB0_9
@@ -358,7 +361,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: and t6, s1, a6
; RV64X60-NEXT: mv s2, a2
; RV64X60-NEXT: mv s3, a4
-; RV64X60-NEXT: mv s4, t5
+; RV64X60-NEXT: mv s4, a0
; RV64X60-NEXT: mv s1, t6
; RV64X60-NEXT: vsetvli s0, zero, e8, m2, ta, ma
; RV64X60-NEXT: .LBB0_7: # %vector.body
@@ -379,9 +382,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: .LBB0_9: # %for.body4.us.preheader
; RV64X60-NEXT: # in Loop: Header=BB0_4 Depth=1
; RV64X60-NEXT: mul s2, a1, t0
-; RV64X60-NEXT: add s1, a0, a6
-; RV64X60-NEXT: add s0, t5, t6
-; RV64X60-NEXT: add s2, s2, s1
+; RV64X60-NEXT: add s0, a0, t6
+; RV64X60-NEXT: add s2, s2, t4
; RV64X60-NEXT: add s4, a4, t6
; RV64X60-NEXT: add t6, t6, a2
; RV64X60-NEXT: .LBB0_10: # %for.body4.us
@@ -466,16 +468,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: or s0, a1, a5
; RV64-NEXT: srli s0, s0, 63
; RV64-NEXT: or t5, t5, s0
+; RV64-NEXT: sltu s0, a6, t4
; RV64-NEXT: or t5, t6, t5
-; RV64-NEXT: sltu t4, a6, t4
-; RV64-NEXT: or t4, t4, t5
-; RV64-NEXT: andi t4, t4, 1
-; RV64-NEXT: mv t5, a0
+; RV64-NEXT: add t4, a0, a6
+; RV64-NEXT: or t5, s0, t5
+; RV64-NEXT: andi t5, t5, 1
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: j .LBB0_6
; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT: add t5, t5, a1
+; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: add a2, a2, a3
; RV64-NEXT: add a4, a4, a5
; RV64-NEXT: addiw t3, t3, 1
@@ -485,7 +487,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: # =>This Loop Header: Depth=1
; RV64-NEXT: # Child Loop BB0_9 Depth 2
; RV64-NEXT: # Child Loop BB0_12 Depth 2
-; RV64-NEXT: beqz t4, .LBB0_8
+; RV64-NEXT: beqz t5, .LBB0_8
; RV64-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1
; RV64-NEXT: li t6, 0
; RV64-NEXT: j .LBB0_11
@@ -496,7 +498,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: and t6, t6, a6
; RV64-NEXT: mv s0, a2
; RV64-NEXT: mv s1, a4
-; RV64-NEXT: mv s2, t5
+; RV64-NEXT: mv s2, a0
; RV64-NEXT: mv s3, t6
; RV64-NEXT: vsetvli s4, zero, e8, m2, ta, ma
; RV64-NEXT: .LBB0_9: # %vector.body
@@ -516,25 +518,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: beq t6, a6, .LBB0_5
; RV64-NEXT: .LBB0_11: # %for.body4.us.preheader
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
-; RV64-NEXT: mul s1, a1, t2
-; RV64-NEXT: add s2, a0, a6
-; RV64-NEXT: add s0, t5, t6
-; RV64-NEXT: add s1, s2, s1
-; RV64-NEXT: add s2, a4, t6
+; RV64-NEXT: mul s2, a1, t2
+; RV64-NEXT: add s0, a0, t6
+; RV64-NEXT: add s1, a4, t6
+; RV64-NEXT: add s2, t4, s2
; RV64-NEXT: add t6, a2, t6
; RV64-NEXT: .LBB0_12: # %for.body4.us
; RV64-NEXT: # Parent Loop BB0_6 Depth=1
; RV64-NEXT: # => This Inner Loop Header: Depth=2
; RV64-NEXT: lbu s3, 0(t6)
-; RV64-NEXT: lbu s4, 0(s2)
+; RV64-NEXT: lbu s4, 0(s1)
; RV64-NEXT: add s3, s3, s4
; RV64-NEXT: addi s3, s3, 1
; RV64-NEXT: srli s3, s3, 1
; RV64-NEXT: sb s3, 0(s0)
; RV64-NEXT: addi s0, s0, 1
-; RV64-NEXT: addi s2, s2, 1
+; RV64-NEXT: addi s1, s1, 1
; RV64-NEXT: addi t6, t6, 1
-; RV64-NEXT: bne s0, s1, .LBB0_12
+; RV64-NEXT: bne s0, s2, .LBB0_12
; RV64-NEXT: j .LBB0_5
; RV64-NEXT: .LBB0_13:
; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index 59a702ab6b17f..1bfeeb92e06dd 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -2075,14 +2075,14 @@ define i64 @abs_i64(i64 %x) {
; RV32SFB-LABEL: abs_i64:
; RV32SFB: # %bb.0:
; RV32SFB-NEXT: snez a2, a0
-; RV32SFB-NEXT: add a2, a2, a1
+; RV32SFB-NEXT: neg a3, a1
; RV32SFB-NEXT: bgez a1, .LBB35_2
; RV32SFB-NEXT: # %bb.1:
; RV32SFB-NEXT: neg a0, a0
; RV32SFB-NEXT: .LBB35_2:
; RV32SFB-NEXT: bgez a1, .LBB35_4
; RV32SFB-NEXT: # %bb.3:
-; RV32SFB-NEXT: neg a1, a2
+; RV32SFB-NEXT: sub a1, a3, a2
; RV32SFB-NEXT: .LBB35_4:
; RV32SFB-NEXT: ret
%a = call i64 @llvm.abs.i64(i64 %x, i1 false)
More information about the llvm-commits
mailing list