[llvm] [Target][KernelInfo] Fix kernel-info remarks missing from YAML optimization records (PR #145603)
Miguel Cárdenas via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 17:46:35 PDT 2025
https://github.com/miguelcsx updated https://github.com/llvm/llvm-project/pull/145603
>From c1b0977d0ba3d80b5d5e896307a98a816f4efcd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Tue, 24 Jun 2025 23:24:40 +0200
Subject: [PATCH 1/2] [analysis] fix unknown source locations in KernelInfo
The fix provides meaningful source locations by falling back to the
containing function's subprogram information instead of showing unknown
locations.
---
llvm/lib/Analysis/KernelInfo.cpp | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp
index 93dd7cecb32e1..33a84452d1527 100644
--- a/llvm/lib/Analysis/KernelInfo.cpp
+++ b/llvm/lib/Analysis/KernelInfo.cpp
@@ -77,6 +77,14 @@ class KernelInfo {
} // end anonymous namespace
+static DiagnosticLocation getRemarkLocation(const Instruction &I) {
+ if (DebugLoc DL = I.getDebugLoc())
+ return DiagnosticLocation(DL);
+ if (auto *SP = I.getFunction()->getSubprogram())
+ return DiagnosticLocation(SP);
+ return DiagnosticLocation();
+}
+
static void identifyCallee(OptimizationRemark &R, const Module *M,
const Value *V, StringRef Kind = "") {
SmallString<100> Name; // might be function name or asm expression
@@ -105,16 +113,19 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
TypeSize::ScalarTy StaticSize) {
ORE.emit([&] {
StringRef DbgName;
- DebugLoc Loc;
+ DebugLoc DL;
bool Artificial = false;
auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
if (!DVRs.empty()) {
const DbgVariableRecord &DVR = **DVRs.begin();
DbgName = DVR.getVariable()->getName();
- Loc = DVR.getDebugLoc();
+ DL = DVR.getDebugLoc();
Artificial = DVR.Variable->isArtificial();
}
- OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
+
+ OptimizationRemark R(DEBUG_TYPE, "Alloca",
+ DL ? DiagnosticLocation(DL)
+ : getRemarkLocation(Alloca),
Alloca.getParent());
R << "in ";
identifyFunction(R, Caller);
@@ -142,7 +153,8 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
const CallBase &Call, StringRef CallKind,
StringRef RemarkKind) {
ORE.emit([&] {
- OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
+ OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call),
+ Call.getParent());
R << "in ";
identifyFunction(R, Caller);
R << ", " << CallKind << ", callee is ";
@@ -155,7 +167,8 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
const Function &Caller,
const Instruction &Inst) {
ORE.emit([&] {
- OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
+ OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess",
+ getRemarkLocation(Inst), Inst.getParent());
R << "in ";
identifyFunction(R, Caller);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -265,7 +278,11 @@ void KernelInfo::updateForBB(const BasicBlock &BB,
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
StringRef Name, int64_t Value) {
ORE.emit([&] {
- OptimizationRemark R(DEBUG_TYPE, Name, &F);
+ DiagnosticLocation DL = F.getSubprogram()
+ ? DiagnosticLocation(F.getSubprogram())
+ : DiagnosticLocation();
+ OptimizationRemark R(DEBUG_TYPE, Name, DL,
+ !F.empty() ? &F.front() : nullptr);
R << "in ";
identifyFunction(R, F);
R << ", " << Name << " = " << itostr(Value);
>From 9f77d27b0608f3ec8e8acebf02506372e7fcaf09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Wed, 25 Jun 2025 02:37:38 +0200
Subject: [PATCH 2/2] [Target][KernelInfo] Move kernel-info pass to
OptimizerLastEPCallback for YAML remark output
The kernel-info pass was registered using FullLinkTimeOptimizationLastEPCallback,
which runs after the optimization record YAML files have been finalized. This
caused kernel-info remarks to appear in terminal output but not in YAML files
when using -fsave-optimization-record.
Move kernel-info registration to OptimizerLastEPCallback, which runs during
the LTO optimization pipeline while the remark streamer is still active.
This ensures kernel-info remarks (including NVVM GPU intrinsics like
@llvm.nvvm.read.ptx.sreg.tid.x) are captured in both terminal output and
YAML optimization record files.
Affects NVPTX and AMDGPU targets.
---
llvm/lib/Analysis/KernelInfo.cpp | 29 ++++---------------
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 17 +++++++----
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 13 +++++----
3 files changed, 25 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp
index 33a84452d1527..93dd7cecb32e1 100644
--- a/llvm/lib/Analysis/KernelInfo.cpp
+++ b/llvm/lib/Analysis/KernelInfo.cpp
@@ -77,14 +77,6 @@ class KernelInfo {
} // end anonymous namespace
-static DiagnosticLocation getRemarkLocation(const Instruction &I) {
- if (DebugLoc DL = I.getDebugLoc())
- return DiagnosticLocation(DL);
- if (auto *SP = I.getFunction()->getSubprogram())
- return DiagnosticLocation(SP);
- return DiagnosticLocation();
-}
-
static void identifyCallee(OptimizationRemark &R, const Module *M,
const Value *V, StringRef Kind = "") {
SmallString<100> Name; // might be function name or asm expression
@@ -113,19 +105,16 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
TypeSize::ScalarTy StaticSize) {
ORE.emit([&] {
StringRef DbgName;
- DebugLoc DL;
+ DebugLoc Loc;
bool Artificial = false;
auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
if (!DVRs.empty()) {
const DbgVariableRecord &DVR = **DVRs.begin();
DbgName = DVR.getVariable()->getName();
- DL = DVR.getDebugLoc();
+ Loc = DVR.getDebugLoc();
Artificial = DVR.Variable->isArtificial();
}
-
- OptimizationRemark R(DEBUG_TYPE, "Alloca",
- DL ? DiagnosticLocation(DL)
- : getRemarkLocation(Alloca),
+ OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
Alloca.getParent());
R << "in ";
identifyFunction(R, Caller);
@@ -153,8 +142,7 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
const CallBase &Call, StringRef CallKind,
StringRef RemarkKind) {
ORE.emit([&] {
- OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call),
- Call.getParent());
+ OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
R << "in ";
identifyFunction(R, Caller);
R << ", " << CallKind << ", callee is ";
@@ -167,8 +155,7 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
const Function &Caller,
const Instruction &Inst) {
ORE.emit([&] {
- OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess",
- getRemarkLocation(Inst), Inst.getParent());
+ OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
R << "in ";
identifyFunction(R, Caller);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -278,11 +265,7 @@ void KernelInfo::updateForBB(const BasicBlock &BB,
static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
StringRef Name, int64_t Value) {
ORE.emit([&] {
- DiagnosticLocation DL = F.getSubprogram()
- ? DiagnosticLocation(F.getSubprogram())
- : DiagnosticLocation();
- OptimizationRemark R(DEBUG_TYPE, Name, DL,
- !F.empty() ? &F.front() : nullptr);
+ OptimizationRemark R(DEBUG_TYPE, Name, &F);
R << "in ";
identifyFunction(R, F);
R << ", " << Name << " = " << itostr(Value);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d2e4825cf3c81..3306077ac1440 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -941,13 +941,20 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
*this, Opt, ThinOrFullLTOPhase::FullLTOPostLink));
}
}
- if (!NoKernelInfoEndLTO) {
- FunctionPassManager FPM;
- FPM.addPass(KernelInfoPrinter(this));
- PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
});
+ // Add kernel-info pass using OptimizerLastEPCallback to run during LTO
+ // while remark streamer is still active
+ if (!NoKernelInfoEndLTO) {
+ PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ FunctionPassManager FPM;
+ FPM.addPass(KernelInfoPrinter(this));
+ PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
+ }
+
PB.registerRegClassFilterParsingCallback(
[](StringRef FilterName) -> RegAllocFilterFunc {
if (FilterName == "sgpr")
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index ef310e5828f22..b9b6aa55cc875 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -260,12 +260,13 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
if (!NoKernelInfoEndLTO) {
- PB.registerFullLinkTimeOptimizationLastEPCallback(
- [this](ModulePassManager &PM, OptimizationLevel Level) {
- FunctionPassManager FPM;
- FPM.addPass(KernelInfoPrinter(this));
- PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- });
+ PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ FunctionPassManager FPM;
+ FPM.addPass(KernelInfoPrinter(this));
+ PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ });
}
}
More information about the llvm-commits
mailing list