[llvm] [NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit (PR #144136)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 10:07:26 PDT 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/144136
>From 7f09ca23146b066b9d7cc499afcd49a6a080be44 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 13 Jun 2025 04:35:08 +0000
Subject: [PATCH 1/2] [NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit
---
llvm/lib/Target/NVPTX/NVPTX.h | 1 +
.../Target/NVPTX/NVPTXPrologEpilogPass.cpp | 84 ++++++++++++-------
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 15 ++--
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 1 +
.../test/CodeGen/NVPTX/frameindex-lifetime.ll | 51 +++++++++++
5 files changed, 117 insertions(+), 35 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b7c5a0a5c9983..b7fd7090299a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
void initializeNVPTXPeepholePass(PassRegistry &);
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
+void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 7929bd2e0df08..c63ce9945c6af 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -41,7 +41,7 @@ class NVPTXPrologEpilogPass : public MachineFunctionPass {
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
-}
+} // end anonymous namespace
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
return new NVPTXPrologEpilogPass();
@@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
char NVPTXPrologEpilogPass::ID = 0;
+INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE,
+ "NVPTX Prologue/Epilogue Insertion", false, false)
+
+static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ if (MI.isDebugValue()) {
+
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ assert(MI.isDebugOperand(&Op) &&
+ "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+ " machine instruction");
+ Register Reg;
+ unsigned FrameIdx = Op.getIndex();
+
+ StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
+ Op.ChangeToRegister(Reg, false /*isDef*/);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+ if (MI.isNonListDebugValue()) {
+ DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
+ DIExpression::ApplyOffset, Offset);
+ } else {
+ // The debug operand at DebugOpIndex was a frame index at offset
+ // `Offset`; now the operand has been replaced with the frame
+ // register, we must add Offset with `register x, plus Offset`.
+ unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+ SmallVector<uint64_t, 3> Ops;
+ TRI.getOffsetOpcodes(Offset, Ops);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+ }
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
+ return true;
+ }
+ return false;
+}
+
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetFrameLowering &TFI = *STI.getFrameLowering();
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
calculateFrameObjectOffsets(MF);
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isFI())
+ for (MachineBasicBlock &BB : MF) {
+ for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
+ MachineInstr &MI = *std::prev(I);
+
+ bool RemovedMI = false;
+ for (const auto &[Idx, Op] : enumerate(MI.operands())) {
+ if (!Op.isFI())
continue;
- // Frame indices in debug values are encoded in a target independent
- // way with simply the frame index and offset rather than any
- // target-specific addressing mode.
- if (MI.isDebugValue()) {
- MachineOperand &Op = MI.getOperand(i);
- assert(
- MI.isDebugOperand(&Op) &&
- "Frame indices can only appear as a debug operand in a DBG_VALUE*"
- " machine instruction");
- Register Reg;
- auto Offset =
- TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
- Op.ChangeToRegister(Reg, /*isDef=*/false);
- const DIExpression *DIExpr = MI.getDebugExpression();
- if (MI.isNonListDebugValue()) {
- DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
- } else {
- SmallVector<uint64_t, 3> Ops;
- TRI.getOffsetOpcodes(Offset, Ops);
- unsigned OpIdx = MI.getDebugOperandIndex(&Op);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
- }
- MI.getDebugExpressionOp().setMetadata(DIExpr);
+ if (replaceFrameIndexDebugInstr(MF, MI, Idx))
continue;
- }
- TRI.eliminateFrameIndex(MI, 0, i, nullptr);
+ // Eliminate this FrameIndex operand.
+ TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
Modified = true;
+ if (RemovedMI)
+ break;
}
+
+ if (!RemovedMI)
+ --I;
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index eb60e1502cf90..ac7025962cc99 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
+ RegScavenger *) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
- int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ if (MI.isLifetimeMarker()) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- MachineFunction &MF = *MI.getParent()->getParent();
- int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum + 1).getImm();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
+ MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 85d28a703a4cb..087a672f88289 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -114,6 +114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
initializeNVPTXExternalAAWrapperPass(PR);
initializeNVPTXPeepholePass(PR);
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
+ initializeNVPTXPrologEpilogPassPass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
diff --git a/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
new file mode 100644
index 0000000000000..42655538cc7ad
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+
+;; This test is intended to verify that we don't crash when -opt-bisect-limit
+;; is used in conjunction with lifetime markers. Previously, later passes
+;; would not handle these intructions correctly and relied on earlier passes
+;; to remove them.
+
+declare void @bar(ptr)
+
+define void @foo() {
+ %p = alloca i32
+ call void @llvm.lifetime.start(i64 4, ptr %p)
+ call void @bar(ptr %p)
+ call void @llvm.lifetime.end(i64 4, ptr %p)
+ ret void
+}
>From 1840841fa97dce52209f322409cf3ea9fb2b3e39 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 17 Jun 2025 17:07:10 +0000
Subject: [PATCH 2/2] address comments
---
llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index c63ce9945c6af..ababc66cc5d26 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -108,7 +108,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
continue;
// Eliminate this FrameIndex operand.
- TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
+ RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
Modified = true;
if (RemovedMI)
break;
More information about the llvm-commits
mailing list