[llvm] [RISCV] Add a pass to eliminate special copies in order to facilitate shrink-wrap optimization (PR #140716)
Liao Chunyu via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 22:56:04 PDT 2025
https://github.com/ChunyuLiao updated https://github.com/llvm/llvm-project/pull/140716
>From d87b85b0814034dee1348fa3ae72242bf024cdab Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Tue, 20 May 2025 05:59:06 -0400
Subject: [PATCH 1/2] [RISCV] Add a pass to eliminate special copies in order
to facilitate shrink-wrap optimization
Some data types that require extension have redundant copy instructions.
This pass removes specific copies to help shrink-wrap optimization.
---
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
llvm/lib/Target/RISCV/RISCV.h | 2 +
llvm/lib/Target/RISCV/RISCVCopyCombine.cpp | 151 ++++++++++++++++++
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 +
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 +
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 26 +--
.../test/CodeGen/RISCV/rv64-double-convert.ll | 14 +-
llvm/test/CodeGen/RISCV/shrinkwrap.ll | 9 +-
8 files changed, 183 insertions(+), 24 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index e32d6eab3b977..83da083debf34 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(RISCVCodeGen
RISCVCallingConv.cpp
RISCVCodeGenPrepare.cpp
RISCVConstantPoolValue.cpp
+ RISCVCopyCombine.cpp
RISCVDeadRegisterDefinitions.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ae9410193efe1..d5e55bc60224a 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -30,6 +30,8 @@ void initializeRISCVCodeGenPreparePass(PassRegistry &);
FunctionPass *createRISCVDeadRegisterDefinitionsPass();
void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
+FunctionPass *createRISCVCopyCombinePass();
+void initializeRISCVCopyCombinePass(PassRegistry &);
FunctionPass *createRISCVIndirectBranchTrackingPass();
void initializeRISCVIndirectBranchTrackingPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
new file mode 100644
index 0000000000000..d3beee1c681d4
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
@@ -0,0 +1,151 @@
+//===- RISCVCopyCombine.cpp - Remove special copy for RISC-V --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass attempts a shrink-wrap optimization for special cases, which is
+// effective when data types require extension.
+//
+// After finalize-isel:
+// bb0:
+// liveins: $x10, $x11
+// %1:gpr = COPY $x11 ---- will be delete in this pass
+// %0:gpr = COPY $x10
+// %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink,
+// then delete at regalloc
+// BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1
+//
+// bb1:
+// bb2:
+// BNE %2:gpr, killed %5:gpr, %bb.2
+// ...
+// After regalloc
+// bb0:
+// liveins: $x10, $x11
+// renamable $x8 = COPY $x11
+// renamable $x11 = ADDI $x0, 57 --- def x11, so COPY can not be sink
+// BEQ killed renamable $x10, killed renamable $x11, %bb.4
+// PseudoBR %bb.1
+//
+// bb1:
+// bb2:
+// BEQ killed renamable $x8, killed renamable $x10, %bb.4
+//
+// ----->
+//
+// After this pass:
+// bb0:
+// liveins: $x10, $x11
+// %0:gpr = COPY $x10
+// %2:gpr = COPY $x11
+// BEQ %0:gpr, killed %3:gpr, %bb.3
+// PseudoBR %bb1
+//
+// bb1:
+// bb2:
+// BNE %2:gpr, killed %5:gpr, %bb.2
+// ...
+// After regalloc
+// bb0:
+// liveins: $x10, $x11
+// renamable $x12 = ADDI $x0, 57
+// renamable $x8 = COPY $x11
+// BEQ killed renamable $x10, killed renamable $x11, %bb.4
+// PseudoBR %bb.1
+//
+// bb1:
+// bb2:
+// BEQ killed renamable $x8, killed renamable $x10, %bb.4
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "riscv-copy-combine"
+#define RISCV_COPY_COMBINE "RISC-V Copy Combine"
+
+STATISTIC(NumCopyDeleted, "Number of copy deleted");
+
+namespace {
+class RISCVCopyCombine : public MachineFunctionPass {
+public:
+ static char ID;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+ RISCVCopyCombine() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ StringRef getPassName() const override { return RISCV_COPY_COMBINE; }
+
+private:
+ bool optimizeBlock(MachineBasicBlock &MBB);
+};
+} // end anonymous namespace
+
+char RISCVCopyCombine::ID = 0;
+INITIALIZE_PASS(RISCVCopyCombine, DEBUG_TYPE, RISCV_COPY_COMBINE, false, false)
+
+bool RISCVCopyCombine::optimizeBlock(MachineBasicBlock &MBB) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 3> Cond;
+ if (TII->analyzeBranch(MBB, TBB, FBB, Cond, /*AllowModify*/ false) ||
+ Cond.empty())
+ return false;
+
+ if (!TBB || Cond.size() != 3)
+ return false;
+
+ MachineInstr *MI = MRI->getVRegDef(Cond[1].getReg());
+ if (MI->getOpcode() != RISCV::COPY)
+ return false;
+
+ Register Op1reg = MI->getOperand(1).getReg();
+ if (MRI->hasOneUse(Op1reg) && Op1reg.isVirtual() &&
+ MI->getOperand(0).getReg().isVirtual()) {
+ MachineInstr *Src = MRI->getVRegDef(Op1reg);
+ if (Src && Src->getOpcode() == RISCV::COPY &&
+ Src->getParent() == MI->getParent()) {
+ Register SrcOp1reg = Src->getOperand(1).getReg();
+ MRI->replaceRegWith(Op1reg, SrcOp1reg);
+ MRI->clearKillFlags(SrcOp1reg);
+ LLVM_DEBUG(dbgs() << "Deleting this copy instruction ";
+ Src->print(dbgs()));
+ ++NumCopyDeleted;
+ Src->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool RISCVCopyCombine::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= optimizeBlock(MBB);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVCopyCombinePass() {
+ return new RISCVCopyCombine();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 15dd4d57727dd..86d8d8f33ca04 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVPostLegalizerCombinerPass(*PR);
initializeKCFIPass(*PR);
initializeRISCVDeadRegisterDefinitionsPass(*PR);
+ initializeRISCVCopyCombinePass(*PR);
initializeRISCVLateBranchOptPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
@@ -455,6 +456,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
+
return TargetPassConfig::addRegAssignAndRewriteFast();
}
@@ -598,6 +600,7 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addMachineSSAOptimization() {
+ addPass(createRISCVCopyCombinePass());
addPass(createRISCVVectorPeepholePass());
addPass(createRISCVFoldMemOffsetPass());
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 19de864422bc5..7ebe888b7cc52 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -96,6 +96,7 @@
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
+; CHECK-NEXT: RISC-V Copy Combine
; CHECK-NEXT: RISC-V Vector Peephole Optimization
; CHECK-NEXT: RISC-V Fold Memory Offset
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index a5426e560bd65..714e8d1cb74c0 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -1080,33 +1080,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV32-NEXT: .cfi_offset s5, -28
; RV32-NEXT: .cfi_offset s6, -32
; RV32-NEXT: mv s5, a5
-; RV32-NEXT: mv s3, a1
-; RV32-NEXT: andi a1, a5, 1
-; RV32-NEXT: beqz a1, .LBB32_8
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: andi a0, a5, 1
+; RV32-NEXT: beqz a0, .LBB32_8
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: mv s0, a4
-; RV32-NEXT: mv s2, a3
+; RV32-NEXT: mv s3, a3
; RV32-NEXT: mv s1, a2
-; RV32-NEXT: mv s4, a0
-; RV32-NEXT: beq s3, a3, .LBB32_3
+; RV32-NEXT: mv s4, a1
+; RV32-NEXT: beq a1, a3, .LBB32_3
; RV32-NEXT: # %bb.2: # %t
-; RV32-NEXT: sltu s6, s3, s2
+; RV32-NEXT: sltu s6, s4, s3
; RV32-NEXT: j .LBB32_4
; RV32-NEXT: .LBB32_3:
-; RV32-NEXT: sltu s6, s4, s1
+; RV32-NEXT: sltu s6, s2, s1
; RV32-NEXT: .LBB32_4: # %t
; RV32-NEXT: mv a0, s6
; RV32-NEXT: call call
; RV32-NEXT: beqz s6, .LBB32_8
; RV32-NEXT: # %bb.5: # %end
-; RV32-NEXT: sltu a1, s4, s1
+; RV32-NEXT: sltu a1, s2, s1
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: beq s3, s2, .LBB32_7
+; RV32-NEXT: beq s4, s3, .LBB32_7
; RV32-NEXT: # %bb.6: # %end
-; RV32-NEXT: sltu a0, s3, s2
+; RV32-NEXT: sltu a0, s4, s3
; RV32-NEXT: .LBB32_7: # %end
-; RV32-NEXT: sub a2, s3, s2
-; RV32-NEXT: sub a3, s4, s1
+; RV32-NEXT: sub a2, s4, s3
+; RV32-NEXT: sub a3, s2, s1
; RV32-NEXT: sub a2, a2, a1
; RV32-NEXT: sw a3, 0(s0)
; RV32-NEXT: sw a2, 4(s0)
diff --git a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
index dd49d9e3e2dce..f9fd528584169 100644
--- a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
@@ -69,14 +69,14 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, -449
; RV64I-NEXT: slli a1, a1, 53
; RV64I-NEXT: call __gedf2
; RV64I-NEXT: mv s2, a0
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __fixdfti
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: mv s3, a1
; RV64I-NEXT: li s5, -1
; RV64I-NEXT: bgez s2, .LBB4_2
@@ -86,15 +86,15 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
; RV64I-NEXT: li a0, 575
; RV64I-NEXT: slli a0, a0, 53
; RV64I-NEXT: addi a1, a0, -1
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __gtdf2
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: blez a0, .LBB4_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: srli s3, s5, 1
; RV64I-NEXT: .LBB4_4:
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: mv a1, s0
+; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a1, s1
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: slti a1, s2, 0
@@ -102,7 +102,7 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: addi a3, a1, -1
; RV64I-NEXT: and a1, a0, s3
-; RV64I-NEXT: and a3, a3, s1
+; RV64I-NEXT: and a3, a3, s0
; RV64I-NEXT: neg a2, a2
; RV64I-NEXT: or a2, a2, a3
; RV64I-NEXT: and a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap.ll b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
index 90f9509c72373..235c714f7f33b 100644
--- a/llvm/test/CodeGen/RISCV/shrinkwrap.ll
+++ b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
@@ -361,6 +361,9 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) {
;
; RV64I-SW-LABEL: li_straightline_b:
; RV64I-SW: # %bb.0:
+; RV64I-SW-NEXT: li a2, 57
+; RV64I-SW-NEXT: beq a0, a2, .LBB3_4
+; RV64I-SW-NEXT: # %bb.1: # %do_call
; RV64I-SW-NEXT: addi sp, sp, -16
; RV64I-SW-NEXT: .cfi_def_cfa_offset 16
; RV64I-SW-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
@@ -368,21 +371,19 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) {
; RV64I-SW-NEXT: .cfi_offset ra, -8
; RV64I-SW-NEXT: .cfi_offset s0, -16
; RV64I-SW-NEXT: mv s0, a1
-; RV64I-SW-NEXT: li a1, 57
-; RV64I-SW-NEXT: beq a0, a1, .LBB3_3
-; RV64I-SW-NEXT: # %bb.1: # %do_call
; RV64I-SW-NEXT: call foo
; RV64I-SW-NEXT: li a0, 57
; RV64I-SW-NEXT: beq s0, a0, .LBB3_3
; RV64I-SW-NEXT: # %bb.2: # %do_call2
; RV64I-SW-NEXT: call foo
-; RV64I-SW-NEXT: .LBB3_3: # %exit
+; RV64I-SW-NEXT: .LBB3_3:
; RV64I-SW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-SW-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-SW-NEXT: .cfi_restore ra
; RV64I-SW-NEXT: .cfi_restore s0
; RV64I-SW-NEXT: addi sp, sp, 16
; RV64I-SW-NEXT: .cfi_def_cfa_offset 0
+; RV64I-SW-NEXT: .LBB3_4: # %exit
; RV64I-SW-NEXT: ret
%cmp0 = icmp eq i32 %a, 57
br i1 %cmp0, label %exit, label %do_call
>From 7070326605d352ac908b67d6439ff149ed78cd49 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <liaochunyu126 at 126.com>
Date: Wed, 21 May 2025 13:55:57 +0800
Subject: [PATCH 2/2] Update llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
Co-authored-by: Sam Elliott <sam at lenary.co.uk>
---
llvm/lib/Target/RISCV/RISCVCopyCombine.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
index d3beee1c681d4..e24750d3690bc 100644
--- a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
@@ -16,7 +16,8 @@
// %0:gpr = COPY $x10
// %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink,
// then delete at regalloc
-// BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1
+// BEQ %0:gpr, killed %3:gpr, %bb.3
+// PseudoBR %bb1
//
// bb1:
// bb2:
More information about the llvm-commits
mailing list