[llvm] [RISCV] Add a pass to eliminate special copies in order to facilitate shrink-wrap optimization (PR #140716)

Liao Chunyu via llvm-commits llvm-commits at lists.llvm.org
Tue May 20 22:56:04 PDT 2025


https://github.com/ChunyuLiao updated https://github.com/llvm/llvm-project/pull/140716

>From d87b85b0814034dee1348fa3ae72242bf024cdab Mon Sep 17 00:00:00 2001
From: Liao Chunyu <chunyu at iscas.ac.cn>
Date: Tue, 20 May 2025 05:59:06 -0400
Subject: [PATCH 1/2] [RISCV] Add a pass to eliminate special copies in order
 to facilitate shrink-wrap optimization

Some data types that require extension have redundant copy instructions.
This pass removes specific copies to help shrink-wrap optimization.
---
 llvm/lib/Target/RISCV/CMakeLists.txt          |   1 +
 llvm/lib/Target/RISCV/RISCV.h                 |   2 +
 llvm/lib/Target/RISCV/RISCVCopyCombine.cpp    | 151 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |   3 +
 llvm/test/CodeGen/RISCV/O3-pipeline.ll        |   1 +
 .../test/CodeGen/RISCV/overflow-intrinsics.ll |  26 +--
 .../test/CodeGen/RISCV/rv64-double-convert.ll |  14 +-
 llvm/test/CodeGen/RISCV/shrinkwrap.ll         |   9 +-
 8 files changed, 183 insertions(+), 24 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVCopyCombine.cpp

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index e32d6eab3b977..83da083debf34 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(RISCVCodeGen
   RISCVCallingConv.cpp
   RISCVCodeGenPrepare.cpp
   RISCVConstantPoolValue.cpp
+  RISCVCopyCombine.cpp
   RISCVDeadRegisterDefinitions.cpp
   RISCVExpandAtomicPseudoInsts.cpp
   RISCVExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ae9410193efe1..d5e55bc60224a 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -30,6 +30,8 @@ void initializeRISCVCodeGenPreparePass(PassRegistry &);
 
 FunctionPass *createRISCVDeadRegisterDefinitionsPass();
 void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &);
+FunctionPass *createRISCVCopyCombinePass();
+void initializeRISCVCopyCombinePass(PassRegistry &);
 
 FunctionPass *createRISCVIndirectBranchTrackingPass();
 void initializeRISCVIndirectBranchTrackingPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
new file mode 100644
index 0000000000000..d3beee1c681d4
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
@@ -0,0 +1,151 @@
+//===- RISCVCopyCombine.cpp - Remove special copy for RISC-V --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass attempts a shrink-wrap optimization for special cases, which is
+// effective when data types require extension.
+//
+// After finalize-isel:
+//   bb0:
+//   liveins: $x10, $x11
+//     %1:gpr = COPY $x11   ---- will be delete in this pass
+//     %0:gpr = COPY $x10
+//     %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink,
+//                               then delete at regalloc
+//     BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1
+//
+//   bb1:
+//   bb2:
+//     BNE %2:gpr, killed %5:gpr, %bb.2
+// ...
+// After regalloc
+//  bb0:
+//    liveins: $x10, $x11
+//    renamable $x8 = COPY $x11
+//    renamable $x11 = ADDI $x0, 57 --- def x11, so COPY can not be sink
+//    BEQ killed renamable $x10, killed renamable $x11, %bb.4
+//    PseudoBR %bb.1
+//
+//  bb1:
+//  bb2:
+//    BEQ killed renamable $x8, killed renamable $x10, %bb.4
+//
+// ----->
+//
+// After this pass:
+//   bb0:
+//   liveins: $x10, $x11
+//     %0:gpr = COPY $x10
+//     %2:gpr = COPY $x11
+//     BEQ %0:gpr, killed %3:gpr, %bb.3
+//     PseudoBR %bb1
+//
+//   bb1:
+//   bb2:
+//     BNE %2:gpr, killed %5:gpr, %bb.2
+// ...
+// After regalloc
+//  bb0:
+//    liveins: $x10, $x11
+//    renamable $x12 = ADDI $x0, 57
+//    renamable $x8 = COPY $x11
+//    BEQ killed renamable $x10, killed renamable $x11, %bb.4
+//    PseudoBR %bb.1
+//
+//  bb1:
+//  bb2:
+//    BEQ killed renamable $x8, killed renamable $x10, %bb.4
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "riscv-copy-combine"
+#define RISCV_COPY_COMBINE "RISC-V Copy Combine"
+
+STATISTIC(NumCopyDeleted, "Number of copy deleted");
+
+namespace {
+class RISCVCopyCombine : public MachineFunctionPass {
+public:
+  static char ID;
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+  const TargetRegisterInfo *TRI;
+
+  RISCVCopyCombine() : MachineFunctionPass(ID) {}
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+
+  StringRef getPassName() const override { return RISCV_COPY_COMBINE; }
+
+private:
+  bool optimizeBlock(MachineBasicBlock &MBB);
+};
+} // end anonymous namespace
+
+char RISCVCopyCombine::ID = 0;
+INITIALIZE_PASS(RISCVCopyCombine, DEBUG_TYPE, RISCV_COPY_COMBINE, false, false)
+
+bool RISCVCopyCombine::optimizeBlock(MachineBasicBlock &MBB) {
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 3> Cond;
+  if (TII->analyzeBranch(MBB, TBB, FBB, Cond, /*AllowModify*/ false) ||
+      Cond.empty())
+    return false;
+
+  if (!TBB || Cond.size() != 3)
+    return false;
+
+  MachineInstr *MI = MRI->getVRegDef(Cond[1].getReg());
+  if (MI->getOpcode() != RISCV::COPY)
+    return false;
+
+  Register Op1reg = MI->getOperand(1).getReg();
+  if (MRI->hasOneUse(Op1reg) && Op1reg.isVirtual() &&
+      MI->getOperand(0).getReg().isVirtual()) {
+    MachineInstr *Src = MRI->getVRegDef(Op1reg);
+    if (Src && Src->getOpcode() == RISCV::COPY &&
+        Src->getParent() == MI->getParent()) {
+      Register SrcOp1reg = Src->getOperand(1).getReg();
+      MRI->replaceRegWith(Op1reg, SrcOp1reg);
+      MRI->clearKillFlags(SrcOp1reg);
+      LLVM_DEBUG(dbgs() << "Deleting this copy instruction ";
+                 Src->print(dbgs()));
+      ++NumCopyDeleted;
+      Src->eraseFromParent();
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool RISCVCopyCombine::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  TII = MF.getSubtarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+  TRI = MRI->getTargetRegisterInfo();
+
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF)
+    Changed |= optimizeBlock(MBB);
+
+  return Changed;
+}
+
+FunctionPass *llvm::createRISCVCopyCombinePass() {
+  return new RISCVCopyCombine();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 15dd4d57727dd..86d8d8f33ca04 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVPostLegalizerCombinerPass(*PR);
   initializeKCFIPass(*PR);
   initializeRISCVDeadRegisterDefinitionsPass(*PR);
+  initializeRISCVCopyCombinePass(*PR);
   initializeRISCVLateBranchOptPass(*PR);
   initializeRISCVMakeCompressibleOptPass(*PR);
   initializeRISCVGatherScatterLoweringPass(*PR);
@@ -455,6 +456,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
   if (TM->getOptLevel() != CodeGenOptLevel::None &&
       EnableRISCVDeadRegisterElimination)
     addPass(createRISCVDeadRegisterDefinitionsPass());
+
   return TargetPassConfig::addRegAssignAndRewriteFast();
 }
 
@@ -598,6 +600,7 @@ void RISCVPassConfig::addPreEmitPass2() {
 }
 
 void RISCVPassConfig::addMachineSSAOptimization() {
+  addPass(createRISCVCopyCombinePass());
   addPass(createRISCVVectorPeepholePass());
   addPass(createRISCVFoldMemOffsetPass());
 
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 19de864422bc5..7ebe888b7cc52 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -96,6 +96,7 @@
 ; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       RISC-V DAG->DAG Pattern Instruction Selection
 ; CHECK-NEXT:       Finalize ISel and expand pseudo-instructions
+; CHECK-NEXT:       RISC-V Copy Combine
 ; CHECK-NEXT:       RISC-V Vector Peephole Optimization
 ; CHECK-NEXT:       RISC-V Fold Memory Offset
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index a5426e560bd65..714e8d1cb74c0 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -1080,33 +1080,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
 ; RV32-NEXT:    .cfi_offset s5, -28
 ; RV32-NEXT:    .cfi_offset s6, -32
 ; RV32-NEXT:    mv s5, a5
-; RV32-NEXT:    mv s3, a1
-; RV32-NEXT:    andi a1, a5, 1
-; RV32-NEXT:    beqz a1, .LBB32_8
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    andi a0, a5, 1
+; RV32-NEXT:    beqz a0, .LBB32_8
 ; RV32-NEXT:  # %bb.1: # %t
 ; RV32-NEXT:    mv s0, a4
-; RV32-NEXT:    mv s2, a3
+; RV32-NEXT:    mv s3, a3
 ; RV32-NEXT:    mv s1, a2
-; RV32-NEXT:    mv s4, a0
-; RV32-NEXT:    beq s3, a3, .LBB32_3
+; RV32-NEXT:    mv s4, a1
+; RV32-NEXT:    beq a1, a3, .LBB32_3
 ; RV32-NEXT:  # %bb.2: # %t
-; RV32-NEXT:    sltu s6, s3, s2
+; RV32-NEXT:    sltu s6, s4, s3
 ; RV32-NEXT:    j .LBB32_4
 ; RV32-NEXT:  .LBB32_3:
-; RV32-NEXT:    sltu s6, s4, s1
+; RV32-NEXT:    sltu s6, s2, s1
 ; RV32-NEXT:  .LBB32_4: # %t
 ; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    call call
 ; RV32-NEXT:    beqz s6, .LBB32_8
 ; RV32-NEXT:  # %bb.5: # %end
-; RV32-NEXT:    sltu a1, s4, s1
+; RV32-NEXT:    sltu a1, s2, s1
 ; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:    beq s3, s2, .LBB32_7
+; RV32-NEXT:    beq s4, s3, .LBB32_7
 ; RV32-NEXT:  # %bb.6: # %end
-; RV32-NEXT:    sltu a0, s3, s2
+; RV32-NEXT:    sltu a0, s4, s3
 ; RV32-NEXT:  .LBB32_7: # %end
-; RV32-NEXT:    sub a2, s3, s2
-; RV32-NEXT:    sub a3, s4, s1
+; RV32-NEXT:    sub a2, s4, s3
+; RV32-NEXT:    sub a3, s2, s1
 ; RV32-NEXT:    sub a2, a2, a1
 ; RV32-NEXT:    sw a3, 0(s0)
 ; RV32-NEXT:    sw a2, 4(s0)
diff --git a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
index dd49d9e3e2dce..f9fd528584169 100644
--- a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll
@@ -69,14 +69,14 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
 ; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, -449
 ; RV64I-NEXT:    slli a1, a1, 53
 ; RV64I-NEXT:    call __gedf2
 ; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __fixdfti
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    mv s3, a1
 ; RV64I-NEXT:    li s5, -1
 ; RV64I-NEXT:    bgez s2, .LBB4_2
@@ -86,15 +86,15 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
 ; RV64I-NEXT:    li a0, 575
 ; RV64I-NEXT:    slli a0, a0, 53
 ; RV64I-NEXT:    addi a1, a0, -1
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __gtdf2
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    blez a0, .LBB4_4
 ; RV64I-NEXT:  # %bb.3:
 ; RV64I-NEXT:    srli s3, s5, 1
 ; RV64I-NEXT:  .LBB4_4:
-; RV64I-NEXT:    mv a0, s0
-; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call __unorddf2
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    slti a1, s2, 0
@@ -102,7 +102,7 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind {
 ; RV64I-NEXT:    addi a0, a0, -1
 ; RV64I-NEXT:    addi a3, a1, -1
 ; RV64I-NEXT:    and a1, a0, s3
-; RV64I-NEXT:    and a3, a3, s1
+; RV64I-NEXT:    and a3, a3, s0
 ; RV64I-NEXT:    neg a2, a2
 ; RV64I-NEXT:    or a2, a2, a3
 ; RV64I-NEXT:    and a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap.ll b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
index 90f9509c72373..235c714f7f33b 100644
--- a/llvm/test/CodeGen/RISCV/shrinkwrap.ll
+++ b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
@@ -361,6 +361,9 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) {
 ;
 ; RV64I-SW-LABEL: li_straightline_b:
 ; RV64I-SW:       # %bb.0:
+; RV64I-SW-NEXT:    li a2, 57
+; RV64I-SW-NEXT:    beq a0, a2, .LBB3_4
+; RV64I-SW-NEXT:  # %bb.1: # %do_call
 ; RV64I-SW-NEXT:    addi sp, sp, -16
 ; RV64I-SW-NEXT:    .cfi_def_cfa_offset 16
 ; RV64I-SW-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -368,21 +371,19 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) {
 ; RV64I-SW-NEXT:    .cfi_offset ra, -8
 ; RV64I-SW-NEXT:    .cfi_offset s0, -16
 ; RV64I-SW-NEXT:    mv s0, a1
-; RV64I-SW-NEXT:    li a1, 57
-; RV64I-SW-NEXT:    beq a0, a1, .LBB3_3
-; RV64I-SW-NEXT:  # %bb.1: # %do_call
 ; RV64I-SW-NEXT:    call foo
 ; RV64I-SW-NEXT:    li a0, 57
 ; RV64I-SW-NEXT:    beq s0, a0, .LBB3_3
 ; RV64I-SW-NEXT:  # %bb.2: # %do_call2
 ; RV64I-SW-NEXT:    call foo
-; RV64I-SW-NEXT:  .LBB3_3: # %exit
+; RV64I-SW-NEXT:  .LBB3_3:
 ; RV64I-SW-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-SW-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-SW-NEXT:    .cfi_restore ra
 ; RV64I-SW-NEXT:    .cfi_restore s0
 ; RV64I-SW-NEXT:    addi sp, sp, 16
 ; RV64I-SW-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-SW-NEXT:  .LBB3_4: # %exit
 ; RV64I-SW-NEXT:    ret
   %cmp0 = icmp eq i32 %a, 57
   br i1 %cmp0, label %exit, label %do_call

>From 7070326605d352ac908b67d6439ff149ed78cd49 Mon Sep 17 00:00:00 2001
From: Liao Chunyu <liaochunyu126 at 126.com>
Date: Wed, 21 May 2025 13:55:57 +0800
Subject: [PATCH 2/2] Update llvm/lib/Target/RISCV/RISCVCopyCombine.cpp

Co-authored-by: Sam Elliott <sam at lenary.co.uk>
---
 llvm/lib/Target/RISCV/RISCVCopyCombine.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
index d3beee1c681d4..e24750d3690bc 100644
--- a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp
@@ -16,7 +16,8 @@
 //     %0:gpr = COPY $x10
 //     %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink,
 //                               then delete at regalloc
-//     BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1
+//     BEQ %0:gpr, killed %3:gpr, %bb.3
+//     PseudoBR %bb1
 //
 //   bb1:
 //   bb2:



More information about the llvm-commits mailing list