[llvm] [CodeGen][NewPM] Port "PrologEpilogInserter" to NPM (PR #130550)

Vikram Hegde via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 9 22:57:25 PDT 2025


https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/130550

>From 3f6ac9e15e57017e89a18afc88d15fe0844cdd50 Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Mon, 10 Mar 2025 11:25:59 +0530
Subject: [PATCH] [CodeGen][NewPM] Port "PrologEpilogInserter" to NPM

---
 llvm/include/llvm/CodeGen/PEI.h               |  27 +++++
 llvm/include/llvm/InitializePasses.h          |   2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |   1 +
 .../llvm/Passes/MachinePassRegistry.def       |   2 +-
 llvm/lib/CodeGen/CodeGen.cpp                  |   2 +-
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 105 +++++++++++-------
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 .../AArch64/aarch64-large-stack-spbump.mir    |   1 +
 .../AMDGPU/accvgpr-spill-scc-clobber.mir      |   4 +
 .../CodeGen/AMDGPU/agpr-copy-reuse-writes.mir |   1 +
 10 files changed, 103 insertions(+), 43 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/PEI.h

diff --git a/llvm/include/llvm/CodeGen/PEI.h b/llvm/include/llvm/CodeGen/PEI.h
new file mode 100644
index 0000000000000..95503b7861b39
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PEI.h
@@ -0,0 +1,27 @@
+//===- llvm/CodeGen/PEI.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PrologEpilogInserterPass
+    : public PassInfoMixin<PrologEpilogInserterPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_PEI_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index c2cb4cb4ef477..9ffd6f0054e4d 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -222,7 +222,7 @@ void initializeNaryReassociateLegacyPassPass(PassRegistry &);
 void initializeObjCARCContractLegacyPassPass(PassRegistry &);
 void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry &);
 void initializeOptimizePHIsLegacyPass(PassRegistry &);
-void initializePEIPass(PassRegistry &);
+void initializePEILegacyPass(PassRegistry &);
 void initializePHIEliminationPass(PassRegistry &);
 void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &);
 void initializePatchableFunctionPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 25899d04dc664..90310622742be 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -57,6 +57,7 @@
 #include "llvm/CodeGen/MachineSink.h"
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
+#include "llvm/CodeGen/PEI.h"
 #include "llvm/CodeGen/PHIElimination.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
 #include "llvm/CodeGen/PostRASchedulerList.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index f99a5f2c74bf3..dbc49f28c9d2a 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -173,6 +173,7 @@ MACHINE_FUNCTION_PASS("print<machine-post-dom-tree>",
                       MachinePostDominatorTreePrinterPass(errs()))
 MACHINE_FUNCTION_PASS("print<slot-indexes>", SlotIndexesPrinterPass(errs()))
 MACHINE_FUNCTION_PASS("print<virtregmap>", VirtRegMapPrinterPass(errs()))
+MACHINE_FUNCTION_PASS("prolog-epilog", PrologEpilogInserterPass())
 MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass())
 MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
 MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass())
@@ -274,7 +275,6 @@ DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass)
 DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", MachineUniformityInfoPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
-DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass)
 DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index beb7fb284a376..169c79248bd23 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -101,7 +101,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineVerifierLegacyPassPass(Registry);
   initializeObjCARCContractLegacyPassPass(Registry);
   initializeOptimizePHIsLegacyPass(Registry);
-  initializePEIPass(Registry);
+  initializePEILegacyPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePatchableFunctionPass(Registry);
   initializePeepholeOptimizerLegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 9b852c0fd49cf..913bfa576596b 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PEI.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -77,21 +78,7 @@ STATISTIC(NumFuncSeen, "Number of functions seen in PEI");
 
 namespace {
 
-class PEI : public MachineFunctionPass {
-public:
-  static char ID;
-
-  PEI() : MachineFunctionPass(ID) {
-    initializePEIPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
-  /// frame indexes with appropriate references.
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-private:
+class PEIImpl {
   RegScavenger *RS = nullptr;
 
   // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
@@ -137,31 +124,50 @@ class PEI : public MachineFunctionPass {
 
   void insertPrologEpilogCode(MachineFunction &MF);
   void insertZeroCallUsedRegs(MachineFunction &MF);
+
+public:
+  PEIImpl(MachineOptimizationRemarkEmitter *ORE) : ORE(ORE) {}
+  bool run(MachineFunction &MF);
+};
+
+class PEILegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  PEILegacy() : MachineFunctionPass(ID) {
+    initializePEILegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+  /// frame indexes with appropriate references.
+  bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
 } // end anonymous namespace
 
-char PEI::ID = 0;
+char PEILegacy::ID = 0;
 
-char &llvm::PrologEpilogCodeInserterID = PEI::ID;
+char &llvm::PrologEpilogCodeInserterID = PEILegacy::ID;
 
-INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
-                      false)
+INITIALIZE_PASS_BEGIN(PEILegacy, DEBUG_TYPE, "Prologue/Epilogue Insertion",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
-INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
+INITIALIZE_PASS_END(PEILegacy, DEBUG_TYPE,
                     "Prologue/Epilogue Insertion & Frame Finalization", false,
                     false)
 
 MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
-  return new PEI();
+  return new PEILegacy();
 }
 
 STATISTIC(NumBytesStackSpace,
           "Number of bytes used for stack in all functions");
 
-void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+void PEILegacy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addPreserved<MachineLoopInfoWrapperPass>();
   AU.addPreserved<MachineDominatorTreeWrapperPass>();
@@ -213,9 +219,7 @@ static void stashEntryDbgValues(MachineBasicBlock &MBB,
       MI->removeFromParent();
 }
 
-/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
-/// frame indexes with appropriate references.
-bool PEI::runOnMachineFunction(MachineFunction &MF) {
+bool PEIImpl::run(MachineFunction &MF) {
   NumFuncSeen++;
   const Function &F = MF.getFunction();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -223,7 +227,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
 
   RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
-  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
   // Spill frame pointer and/or base pointer registers if they are clobbered.
   // It is placed before call frame instruction elimination so it will not mess
@@ -354,9 +357,31 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+bool PEILegacy::runOnMachineFunction(MachineFunction &MF) {
+  MachineOptimizationRemarkEmitter *ORE =
+      &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+  return PEIImpl(ORE).run(MF);
+}
+
+PreservedAnalyses
+PrologEpilogInserterPass::run(MachineFunction &MF,
+                              MachineFunctionAnalysisManager &MFAM) {
+  MachineOptimizationRemarkEmitter &ORE =
+      MFAM.getResult<MachineOptimizationRemarkEmitterAnalysis>(MF);
+  if (!PEIImpl(&ORE).run(MF))
+    return PreservedAnalyses::all();
+
+  return getMachineFunctionPassPreservedAnalyses()
+      .preserveSet<CFGAnalyses>()
+      .preserve<MachineDominatorTreeAnalysis>()
+      .preserve<MachineLoopAnalysis>();
+}
+
 /// Calculate the MaxCallFrameSize variable for the function's frame
 /// information and eliminate call frame pseudo instructions.
-void PEI::calculateCallFrameInfo(MachineFunction &MF) {
+void PEIImpl::calculateCallFrameInfo(MachineFunction &MF) {
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -397,7 +422,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
 
 /// Compute the sets of entry and return blocks for saving and restoring
 /// callee-saved registers, and placing prolog and epilog code.
-void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) {
+void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Even when we do not change any CSR, we still want to insert the
@@ -651,7 +676,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
   }
 }
 
-void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
+void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
   // We can't list this requirement in getRequiredProperties because some
   // targets (WebAssembly) use virtual registers past this point, and the pass
   // pipeline is set up without giving the passes a chance to look at the
@@ -843,7 +868,7 @@ static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
 
 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
 /// abstract stack objects.
-void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
+void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) {
   const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
 
   bool StackGrowsDown =
@@ -1158,7 +1183,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
 /// insertPrologEpilogCode - Scan the function for modified callee saved
 /// registers, insert spill code for these callee saved registers, then add
 /// prolog and epilog code to the function.
-void PEI::insertPrologEpilogCode(MachineFunction &MF) {
+void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) {
   const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
 
   // Add prologue to the function...
@@ -1195,7 +1220,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
 }
 
 /// insertZeroCallUsedRegs - Zero out call used registers.
-void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+void PEIImpl::insertZeroCallUsedRegs(MachineFunction &MF) {
   const Function &F = MF.getFunction();
 
   if (!F.hasFnAttribute("zero-call-used-regs"))
@@ -1338,7 +1363,7 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
 
 /// Replace all FrameIndex operands with physical register references and actual
 /// offsets.
-void PEI::replaceFrameIndicesBackward(MachineFunction &MF) {
+void PEIImpl::replaceFrameIndicesBackward(MachineFunction &MF) {
   const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
 
   for (auto &MBB : MF) {
@@ -1366,7 +1391,7 @@ void PEI::replaceFrameIndicesBackward(MachineFunction &MF) {
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
 /// register references and actual offsets.
-void PEI::replaceFrameIndices(MachineFunction &MF) {
+void PEIImpl::replaceFrameIndices(MachineFunction &MF) {
   const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
 
   for (auto &MBB : MF) {
@@ -1382,8 +1407,8 @@ void PEI::replaceFrameIndices(MachineFunction &MF) {
   }
 }
 
-bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
-                                      unsigned OpIdx, int SPAdj) {
+bool PEIImpl::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+                                          unsigned OpIdx, int SPAdj) {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   if (MI.isDebugValue()) {
@@ -1464,8 +1489,8 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
   return false;
 }
 
-void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
-                                      MachineFunction &MF, int &SPAdj) {
+void PEIImpl::replaceFrameIndicesBackward(MachineBasicBlock *BB,
+                                          MachineFunction &MF, int &SPAdj) {
   assert(MF.getSubtarget().getRegisterInfo() &&
          "getRegisterInfo() must be implemented!");
 
@@ -1509,8 +1534,8 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
   }
 }
 
-void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
-                              int &SPAdj) {
+void PEIImpl::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
+                                  int &SPAdj) {
   assert(MF.getSubtarget().getRegisterInfo() &&
          "getRegisterInfo() must be implemented!");
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8080059f0bb03..4b5b4af298e84 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -128,6 +128,7 @@
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
+#include "llvm/CodeGen/PEI.h"
 #include "llvm/CodeGen/PHIElimination.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
 #include "llvm/CodeGen/PostRASchedulerList.h"
diff --git a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
index f920813f2b42d..1181869a53da1 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=prolog-epilog %s -o - | FileCheck %s
 --- |
   define i32 @_Z4funcv() {
   entry:
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index c91b686697b9d..6924c168583e8 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -1,8 +1,12 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GFX908 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes='prolog-epilog' %s -o - | FileCheck -check-prefix=GFX908 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GFX90A %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='prolog-epilog' %s -o - | FileCheck -check-prefix=GFX90A %s
 # RUN: llc -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GFX908-FLATSCR %s
+# RUN: llc -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes='prolog-epilog' %s -o - | FileCheck -check-prefix=GFX908-FLATSCR %s
 # RUN: llc -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GFX90A-FLATSCR %s
+# RUN: llc -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='prolog-epilog' %s -o - | FileCheck -check-prefix=GFX90A-FLATSCR %s
 
 ---
 name: agpr32_restore_clobber_scc
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
index 683a89061ddda..1573903945a3e 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=prologepilog,postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes='prolog-epilog,post-ra-pseudos' -o - %s | FileCheck -check-prefix=GFX908 %s
 
 ---
 name: standard



More information about the llvm-commits mailing list