[llvm] AMDGPU/NewPM: Port SIFoldOperands to new pass manager (PR #105791)

Fri Aug 23 00:06:52 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (Akshat-Oke)

<details>
<summary>Changes</summary>



---

Patch is 40.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105791.diff


40 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+4-3) 
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+58-38) 
- (added) llvm/lib/Target/AMDGPU/SIFoldOperands.h (+23) 
- (modified) llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-cndmask.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-implicit-operand.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-multiple-commute.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-operands-order.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-over-exec.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-readlane.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-short-64-bit-literals.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-vgpr-phyreg.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/folding-of-i32-as-i64.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/huge-number-operand-folds.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-aligned-agprs.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-aligned-vgprs.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-scalar-clamp.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/skip-fold-regsequence.mir (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/swdev282079.mir (+1) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f5044f52f1648d..dd5dfc044f240d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -35,7 +35,7 @@ void initializeAMDGPURegBankSelectPass(PassRegistry &);
 // SI Passes
 FunctionPass *createGCNDPPCombinePass();
 FunctionPass *createSIAnnotateControlFlowLegacyPass();
-FunctionPass *createSIFoldOperandsPass();
+FunctionPass *createSIFoldOperandsLegacyPass();
 FunctionPass *createSIPeepholeSDWAPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
@@ -164,8 +164,8 @@ extern char &AMDGPURewriteOutArgumentsID;
 void initializeGCNDPPCombinePass(PassRegistry &);
 extern char &GCNDPPCombineID;
 
-void initializeSIFoldOperandsPass(PassRegistry &);
-extern char &SIFoldOperandsID;
+void initializeSIFoldOperandsLegacyPass(PassRegistry &);
+extern char &SIFoldOperandsLegacyID;
 
 void initializeSIPeepholeSDWAPass(PassRegistry &);
 extern char &SIPeepholeSDWAID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 5c068b5695c8d1..0304fd7d11f164 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -96,4 +96,5 @@ FUNCTION_PASS_WITH_PARAMS(
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2b9e431e86f893..f7ac185718dde2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -34,6 +34,7 @@
 #include "R600.h"
 #include "R600TargetMachine.h"
 #include "SIFixSGPRCopies.h"
+#include "SIFoldOperands.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
@@ -424,7 +425,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSILowerSGPRSpillsPass(*PR);
   initializeSIFixSGPRCopiesLegacyPass(*PR);
   initializeSIFixVGPRCopiesPass(*PR);
-  initializeSIFoldOperandsPass(*PR);
+  initializeSIFoldOperandsLegacyPass(*PR);
   initializeSIPeepholeSDWAPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIOptimizeExecMaskingPreRAPass(*PR);
@@ -1281,7 +1282,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // instructions leftover after the operands are folded as well.
   //
   // XXX - Can we get away without running DeadMachineInstructionElim again?
-  addPass(&SIFoldOperandsID);
+  addPass(&SIFoldOperandsLegacyID);
   if (EnableDPPCombine)
     addPass(&GCNDPPCombineID);
   addPass(&SILoadStoreOptimizerID);
@@ -1289,7 +1290,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
     addPass(&SIPeepholeSDWAID);
     addPass(&EarlyMachineLICMID);
     addPass(&MachineCSEID);
-    addPass(&SIFoldOperandsID);
+    addPass(&SIFoldOperandsLegacyID);
   }
   addPass(&DeadMachineInstructionElimID);
   addPass(createSIShrinkInstructionsPass());
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 875738dad74ced..88c656527d8a51 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 
+#include "SIFoldOperands.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -66,9 +67,8 @@ struct FoldCandidate {
   bool needsShrink() const { return ShrinkOpcode != -1; }
 };
 
-class SIFoldOperands : public MachineFunctionPass {
+class SIFoldOperandsImpl {
 public:
-  static char ID;
   MachineRegisterInfo *MRI;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
@@ -121,11 +121,23 @@ class SIFoldOperands : public MachineFunctionPass {
   bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);
 
 public:
-  SIFoldOperands() : MachineFunctionPass(ID) {
-    initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
-  }
+  SIFoldOperandsImpl() = default;
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool run(MachineFunction& MF);
+};
+
+class SIFoldOperandsLegacy : public MachineFunctionPass {
+  public:
+  static char ID;
+
+  SIFoldOperandsLegacy() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (skipFunction(MF.getFunction()))
+      return false;
+    SIFoldOperandsImpl Impl;
+    return Impl.run(MF);
+  }
 
   StringRef getPassName() const override { return "SI Fold Operands"; }
 
@@ -137,12 +149,12 @@ class SIFoldOperands : public MachineFunctionPass {
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
+INITIALIZE_PASS(SIFoldOperandsLegacy, DEBUG_TYPE,
                 "SI Fold Operands", false, false)
 
-char SIFoldOperands::ID = 0;
+char SIFoldOperandsLegacy::ID = 0;
 
-char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+char &llvm::SIFoldOperandsLegacyID = SIFoldOperandsLegacy::ID;
 
 static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
                                              const TargetRegisterInfo &TRI,
@@ -177,7 +189,7 @@ static unsigned macToMad(unsigned Opc) {
 
 // TODO: Add heuristic that the frame index might not fit in the addressing mode
 // immediate offset to avoid materializing in loops.
-bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
+bool SIFoldOperandsImpl::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
                                        const MachineOperand &OpToFold) const {
   if (!OpToFold.isFI())
     return false;
@@ -196,11 +208,11 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
   return OpNo == VIdx && SIdx == -1;
 }
 
-FunctionPass *llvm::createSIFoldOperandsPass() {
-  return new SIFoldOperands();
+FunctionPass *llvm::createSIFoldOperandsLegacyPass() {
+  return new SIFoldOperandsLegacy();
 }
 
-bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
   const uint64_t TSFlags = MI->getDesc().TSFlags;
@@ -230,7 +242,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
   return true;
 }
 
-bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
   unsigned Opcode = MI->getOpcode();
@@ -354,7 +366,7 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
   return false;
 }
 
-bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
   assert(Old.isReg());
@@ -464,7 +476,7 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
   FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
 }
 
-bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
+bool SIFoldOperandsImpl::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
                                       MachineInstr *MI, unsigned OpNo,
                                       MachineOperand *OpToFold) const {
   const unsigned Opc = MI->getOpcode();
@@ -645,7 +657,7 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
   return true;
 }
 
-bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
+bool SIFoldOperandsImpl::isUseSafeToFold(const MachineInstr &MI,
                                      const MachineOperand &UseMO) const {
   // Operands of SDWA instructions must be registers.
   return !TII->isSDWA(MI);
@@ -654,7 +666,7 @@ bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
 // Find a def of the UseReg, check if it is a reg_sequence and find initializers
 // for each subreg, tracking it to foldable inline immediate if possible.
 // Returns true on success.
-bool SIFoldOperands::getRegSeqInit(
+bool SIFoldOperandsImpl::getRegSeqInit(
     SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
     Register UseReg, uint8_t OpTy) const {
   MachineInstr *Def = MRI->getVRegDef(UseReg);
@@ -686,7 +698,7 @@ bool SIFoldOperands::getRegSeqInit(
   return true;
 }
 
-bool SIFoldOperands::tryToFoldACImm(
+bool SIFoldOperandsImpl::tryToFoldACImm(
     const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
     SmallVectorImpl<FoldCandidate> &FoldList) const {
   const MCInstrDesc &Desc = UseMI->getDesc();
@@ -752,7 +764,7 @@ bool SIFoldOperands::tryToFoldACImm(
   return true;
 }
 
-void SIFoldOperands::foldOperand(
+void SIFoldOperandsImpl::foldOperand(
   MachineOperand &OpToFold,
   MachineInstr *UseMI,
   int UseOpIdx,
@@ -1187,7 +1199,7 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
 }
 
 MachineOperand *
-SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
+SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
   // If this has a subregister, it obviously is a register source.
   if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
       !Op.getReg().isVirtual())
@@ -1206,7 +1218,7 @@ SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
 // Try to simplify operations with a constant that may appear after instruction
 // selection.
 // TODO: See if a frame index with a fixed offset can fold.
-bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
+bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
   if (!MI->allImplicitDefsAreDead())
     return false;
 
@@ -1307,7 +1319,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
 }
 
 // Try to fold an instruction into a simpler one
-bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
+bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
   if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
       Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
@@ -1346,7 +1358,7 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
   return true;
 }
 
-bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
+bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
   if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
       MI.getOpcode() != AMDGPU::V_AND_B32_e32)
     return false;
@@ -1368,7 +1380,7 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
   return true;
 }
 
-bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
+bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
                                      MachineOperand &OpToFold) const {
   // We need mutate the operands of new mov instructions to add implicit
   // uses of EXEC, but adding them invalidates the use_iterator, so defer
@@ -1442,7 +1454,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
   return true;
 }
 
-bool SIFoldOperands::tryFoldFoldableCopy(
+bool SIFoldOperandsImpl::tryFoldFoldableCopy(
     MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
   // Specially track simple redefs of m0 to the same value in a block, so we
   // can erase the later ones.
@@ -1519,7 +1531,7 @@ bool SIFoldOperands::tryFoldFoldableCopy(
 
 // Clamp patterns are canonically selected to v_max_* instructions, so only
 // handle them.
-const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
+const MachineOperand *SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const {
   unsigned Op = MI.getOpcode();
   switch (Op) {
   case AMDGPU::V_MAX_F32_e64:
@@ -1567,7 +1579,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
 }
 
 // FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
-bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
+bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
   const MachineOperand *ClampSrc = isClamp(MI);
   if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
     return false;
@@ -1662,7 +1674,7 @@ static int getOModValue(unsigned Opc, int64_t Val) {
 // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
 // handled, so will anything other than that break?
 std::pair<const MachineOperand *, int>
-SIFoldOperands::isOMod(const MachineInstr &MI) const {
+SIFoldOperandsImpl::isOMod(const MachineInstr &MI) const {
   unsigned Op = MI.getOpcode();
   switch (Op) {
   case AMDGPU::V_MUL_F64_e64:
@@ -1740,7 +1752,7 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
 }
 
 // FIXME: Does this need to check IEEE bit on function?
-bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
+bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
   const MachineOperand *RegOp;
   int OMod;
   std::tie(RegOp, OMod) = isOMod(MI);
@@ -1779,7 +1791,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
 
 // Try to fold a reg_sequence with vgpr output and agpr inputs into an
 // instruction which can take an agpr. So far that means a store.
-bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
+bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) {
   assert(MI.isRegSequence());
   auto Reg = MI.getOperand(0).getReg();
 
@@ -1926,7 +1938,7 @@ static bool isAGPRCopy(const SIRegisterInfo &TRI,
 //      loop:
 //        %3:areg = PHI %2:areg, %entry, %X:areg,
 //        %4:areg = (instr using %3:areg)
-bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
+bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &PHI) {
   assert(PHI.isPHI());
 
   Register PhiOut = PHI.getOperand(0).getReg();
@@ -2030,7 +2042,7 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
 }
 
 // Attempt to convert VGPR load to an AGPR load.
-bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
+bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &MI) {
   assert(MI.mayLoad());
   if (!ST->hasGFX90AInsts() || MI.getNumExplicitDefs() != 1)
     return false;
@@ -2117,7 +2129,7 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
 //        %0:areg = PHI %tmp_agpr, %a, %x, %c
 //        %1:areg = PHI %tmp_agpr, %a, %y, %c
 //        %2:areg = PHI %tmp_agpr, %a, %z, %c
-bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
+bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
   // This is only really needed on GFX908 where AGPR-AGPR copies are
   // unreasonably difficult.
   if (ST->hasGFX90AInsts())
@@ -2182,10 +2194,7 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
   return Changed;
 }
 
-bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()))
-    return false;
-
+bool SIFoldOperandsImpl::run(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   ST = &MF.getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
@@ -2246,3 +2255,14 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
+
+PreservedAnalyses SIFoldOperandsPass::run(MachineFunction &MF, MachineFunctionAnalysisManager&) {
+  SIFoldOperandsImpl Impl;
+  bool Changed = Impl.run(MF);
+  if(!Changed) {
+    return PreservedAnalyses::all();
+  }
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.h b/llvm/lib/Target/AMDGPU/SIFoldOperands.h
new file mode 100644
index 00000000000000..516492dfcbacd7
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.h
@@ -0,0 +1,23 @@
+//===- SIFoldOperands.h ----------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFOLDOPERANDS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFOLDOPERANDS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIFoldOperandsPass : public PassInfoMixin<SIFoldOperandsPass> {
+public:
+  SIFoldOperandsPass() = default;
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 3db2b6ed9ab4ba..e5c7da6ee98fcc 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -passes si-fold-operands,dead-mi-elimination -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ...
 
 # GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
index 32c594c796912f..aa34cf8a37fd4d 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-flat-scratch -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+enable-flat-scratch -passes=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 
 ---
 name:            test_fold_fi_scratch_load_vgpr
diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
index e94546fd5e8a51..4ccafe0b2b5664 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir
@@ -3,6 +3,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
 # RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
 
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -passes si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX908
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -passes si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
+# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -passes si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A
 ---
 name: test_sgpr_init_multiuse
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
index dd3c798dcbcfc4..28e44080de1b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -passes si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
 
 ---
 name:            fold_cndmask
diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir
index 4b84c0ec56f4d7..9b8bb428b01344 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
 
 # CHECK: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 # CHECK: %2:vgpr_32 = V_MOV_B32_e32 0,...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/105791