[llvm] [NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit (PR #144136)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 17 10:07:26 PDT 2025


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/144136

>From 7f09ca23146b066b9d7cc499afcd49a6a080be44 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 13 Jun 2025 04:35:08 +0000
Subject: [PATCH 1/2] [NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit

---
 llvm/lib/Target/NVPTX/NVPTX.h                 |  1 +
 .../Target/NVPTX/NVPTXPrologEpilogPass.cpp    | 84 ++++++++++++-------
 llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp   | 15 ++--
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |  1 +
 .../test/CodeGen/NVPTX/frameindex-lifetime.ll | 51 +++++++++++
 5 files changed, 117 insertions(+), 35 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b7c5a0a5c9983..b7fd7090299a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
 void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
 void initializeNVPTXPeepholePass(PassRegistry &);
 void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
+void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
 
 struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 7929bd2e0df08..c63ce9945c6af 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -41,7 +41,7 @@ class NVPTXPrologEpilogPass : public MachineFunctionPass {
 private:
   void calculateFrameObjectOffsets(MachineFunction &Fn);
 };
-}
+} // end anonymous namespace
 
 MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
   return new NVPTXPrologEpilogPass();
@@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
 
 char NVPTXPrologEpilogPass::ID = 0;
 
+INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE,
+                "NVPTX Prologue/Epilogue Insertion", false, false)
+
+static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+                                        unsigned OpIdx) {
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  if (MI.isDebugValue()) {
+
+    MachineOperand &Op = MI.getOperand(OpIdx);
+    assert(MI.isDebugOperand(&Op) &&
+           "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+           " machine instruction");
+    Register Reg;
+    unsigned FrameIdx = Op.getIndex();
+
+    StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
+    Op.ChangeToRegister(Reg, false /*isDef*/);
+
+    const DIExpression *DIExpr = MI.getDebugExpression();
+    if (MI.isNonListDebugValue()) {
+      DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
+                                           DIExpression::ApplyOffset, Offset);
+    } else {
+      // The debug operand at DebugOpIndex was a frame index at offset
+      // `Offset`; now the operand has been replaced with the frame
+      // register, we must add Offset with `register x, plus Offset`.
+      unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+      SmallVector<uint64_t, 3> Ops;
+      TRI.getOffsetOpcodes(Offset, Ops);
+      DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+    }
+    MI.getDebugExpressionOp().setMetadata(DIExpr);
+    return true;
+  }
+  return false;
+}
+
 bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetFrameLowering &TFI = *STI.getFrameLowering();
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
 
   calculateFrameObjectOffsets(MF);
 
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        if (!MI.getOperand(i).isFI())
+  for (MachineBasicBlock &BB : MF) {
+    for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
+      MachineInstr &MI = *std::prev(I);
+
+      bool RemovedMI = false;
+      for (const auto &[Idx, Op] : enumerate(MI.operands())) {
+        if (!Op.isFI())
           continue;
 
-        // Frame indices in debug values are encoded in a target independent
-        // way with simply the frame index and offset rather than any
-        // target-specific addressing mode.
-        if (MI.isDebugValue()) {
-          MachineOperand &Op = MI.getOperand(i);
-          assert(
-              MI.isDebugOperand(&Op) &&
-              "Frame indices can only appear as a debug operand in a DBG_VALUE*"
-              " machine instruction");
-          Register Reg;
-          auto Offset =
-              TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
-          Op.ChangeToRegister(Reg, /*isDef=*/false);
-          const DIExpression *DIExpr = MI.getDebugExpression();
-          if (MI.isNonListDebugValue()) {
-            DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
-          } else {
-	    SmallVector<uint64_t, 3> Ops;
-            TRI.getOffsetOpcodes(Offset, Ops);
-            unsigned OpIdx = MI.getDebugOperandIndex(&Op);
-            DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
-          }
-          MI.getDebugExpressionOp().setMetadata(DIExpr);
+        if (replaceFrameIndexDebugInstr(MF, MI, Idx))
           continue;
-        }
 
-        TRI.eliminateFrameIndex(MI, 0, i, nullptr);
+        // Eliminate this FrameIndex operand.
+        TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
         Modified = true;
+        if (RemovedMI)
+          break;
       }
+
+      if (!RemovedMI)
+        --I;
     }
   }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index eb60e1502cf90..ac7025962cc99 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
-                                            RegScavenger *RS) const {
+                                            RegScavenger *) const {
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
-  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+  if (MI.isLifetimeMarker()) {
+    MI.eraseFromParent();
+    return true;
+  }
+
+  const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
-  MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
-               MI.getOperand(FIOperandNum + 1).getImm();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
+                     MI.getOperand(FIOperandNum + 1).getImm();
 
   // Using I0 as the frame pointer
   MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 85d28a703a4cb..087a672f88289 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -114,6 +114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
   initializeNVPTXExternalAAWrapperPass(PR);
   initializeNVPTXPeepholePass(PR);
   initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
+  initializeNVPTXPrologEpilogPassPass(PR);
 }
 
 static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
diff --git a/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
new file mode 100644
index 0000000000000..42655538cc7ad
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+
+;; This test is intended to verify that we don't crash when -opt-bisect-limit
+;; is used in conjunction with lifetime markers. Previously, later passes
+;; would not handle these intructions correctly and relied on earlier passes
+;; to remove them.
+
+declare void @bar(ptr)
+
+define void @foo() {
+  %p = alloca i32
+  call void @llvm.lifetime.start(i64 4, ptr %p)
+  call void @bar(ptr %p)
+  call void @llvm.lifetime.end(i64 4, ptr %p)
+  ret void
+}

>From 1840841fa97dce52209f322409cf3ea9fb2b3e39 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 17 Jun 2025 17:07:10 +0000
Subject: [PATCH 2/2] address comments

---
 llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index c63ce9945c6af..ababc66cc5d26 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -108,7 +108,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
           continue;
 
         // Eliminate this FrameIndex operand.
-        TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
+        RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
         Modified = true;
         if (RemovedMI)
           break;



More information about the llvm-commits mailing list