[llvm] [Target][KernelInfo] Fix kernel-info remarks missing from YAML optimization records (PR #145603)

Miguel Cárdenas via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 17:46:35 PDT 2025


https://github.com/miguelcsx updated https://github.com/llvm/llvm-project/pull/145603

>From c1b0977d0ba3d80b5d5e896307a98a816f4efcd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Tue, 24 Jun 2025 23:24:40 +0200
Subject: [PATCH 1/2] [analysis] fix unknown source locations in KernelInfo

The fix provides meaningful source locations by falling back to the
containing function's subprogram information instead of showing unknown
locations.
---
 llvm/lib/Analysis/KernelInfo.cpp | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp
index 93dd7cecb32e1..33a84452d1527 100644
--- a/llvm/lib/Analysis/KernelInfo.cpp
+++ b/llvm/lib/Analysis/KernelInfo.cpp
@@ -77,6 +77,14 @@ class KernelInfo {
 
 } // end anonymous namespace
 
+static DiagnosticLocation getRemarkLocation(const Instruction &I) {
+  if (DebugLoc DL = I.getDebugLoc())
+    return DiagnosticLocation(DL);
+  if (auto *SP = I.getFunction()->getSubprogram())
+    return DiagnosticLocation(SP);
+  return DiagnosticLocation();
+}
+
 static void identifyCallee(OptimizationRemark &R, const Module *M,
                            const Value *V, StringRef Kind = "") {
   SmallString<100> Name; // might be function name or asm expression
@@ -105,16 +113,19 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
                          TypeSize::ScalarTy StaticSize) {
   ORE.emit([&] {
     StringRef DbgName;
-    DebugLoc Loc;
+    DebugLoc DL;
     bool Artificial = false;
     auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
     if (!DVRs.empty()) {
       const DbgVariableRecord &DVR = **DVRs.begin();
       DbgName = DVR.getVariable()->getName();
-      Loc = DVR.getDebugLoc();
+      DL = DVR.getDebugLoc();
       Artificial = DVR.Variable->isArtificial();
     }
-    OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
+
+    OptimizationRemark R(DEBUG_TYPE, "Alloca",
+                         DL ? DiagnosticLocation(DL)
+                            : getRemarkLocation(Alloca),
                          Alloca.getParent());
     R << "in ";
     identifyFunction(R, Caller);
@@ -142,7 +153,8 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
                        const CallBase &Call, StringRef CallKind,
                        StringRef RemarkKind) {
   ORE.emit([&] {
-    OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
+    OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call),
+                         Call.getParent());
     R << "in ";
     identifyFunction(R, Caller);
     R << ", " << CallKind << ", callee is ";
@@ -155,7 +167,8 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
                                       const Function &Caller,
                                       const Instruction &Inst) {
   ORE.emit([&] {
-    OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
+    OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess",
+                         getRemarkLocation(Inst), Inst.getParent());
     R << "in ";
     identifyFunction(R, Caller);
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -265,7 +278,11 @@ void KernelInfo::updateForBB(const BasicBlock &BB,
 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
                            StringRef Name, int64_t Value) {
   ORE.emit([&] {
-    OptimizationRemark R(DEBUG_TYPE, Name, &F);
+    DiagnosticLocation DL = F.getSubprogram()
+                                ? DiagnosticLocation(F.getSubprogram())
+                                : DiagnosticLocation();
+    OptimizationRemark R(DEBUG_TYPE, Name, DL,
+                         !F.empty() ? &F.front() : nullptr);
     R << "in ";
     identifyFunction(R, F);
     R << ", " << Name << " = " << itostr(Value);

>From 9f77d27b0608f3ec8e8acebf02506372e7fcaf09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Wed, 25 Jun 2025 02:37:38 +0200
Subject: [PATCH 2/2] [Target][KernelInfo] Move kernel-info pass to
 OptimizerLastEPCallback for YAML remark output

The kernel-info pass was registered using FullLinkTimeOptimizationLastEPCallback,
which runs after the optimization record YAML files have been finalized. This
caused kernel-info remarks to appear in terminal output but not in YAML files
when using -fsave-optimization-record.

Move kernel-info registration to OptimizerLastEPCallback, which runs during
the LTO optimization pipeline while the remark streamer is still active.

This ensures kernel-info remarks (including NVVM GPU intrinsics like
@llvm.nvvm.read.ptx.sreg.tid.x) are captured in both terminal output and
YAML optimization record files.

Affects NVPTX and AMDGPU targets.
---
 llvm/lib/Analysis/KernelInfo.cpp              | 29 ++++---------------
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 17 +++++++----
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  | 13 +++++----
 3 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Analysis/KernelInfo.cpp b/llvm/lib/Analysis/KernelInfo.cpp
index 33a84452d1527..93dd7cecb32e1 100644
--- a/llvm/lib/Analysis/KernelInfo.cpp
+++ b/llvm/lib/Analysis/KernelInfo.cpp
@@ -77,14 +77,6 @@ class KernelInfo {
 
 } // end anonymous namespace
 
-static DiagnosticLocation getRemarkLocation(const Instruction &I) {
-  if (DebugLoc DL = I.getDebugLoc())
-    return DiagnosticLocation(DL);
-  if (auto *SP = I.getFunction()->getSubprogram())
-    return DiagnosticLocation(SP);
-  return DiagnosticLocation();
-}
-
 static void identifyCallee(OptimizationRemark &R, const Module *M,
                            const Value *V, StringRef Kind = "") {
   SmallString<100> Name; // might be function name or asm expression
@@ -113,19 +105,16 @@ static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
                          TypeSize::ScalarTy StaticSize) {
   ORE.emit([&] {
     StringRef DbgName;
-    DebugLoc DL;
+    DebugLoc Loc;
     bool Artificial = false;
     auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
     if (!DVRs.empty()) {
       const DbgVariableRecord &DVR = **DVRs.begin();
       DbgName = DVR.getVariable()->getName();
-      DL = DVR.getDebugLoc();
+      Loc = DVR.getDebugLoc();
       Artificial = DVR.Variable->isArtificial();
     }
-
-    OptimizationRemark R(DEBUG_TYPE, "Alloca",
-                         DL ? DiagnosticLocation(DL)
-                            : getRemarkLocation(Alloca),
+    OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
                          Alloca.getParent());
     R << "in ";
     identifyFunction(R, Caller);
@@ -153,8 +142,7 @@ static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
                        const CallBase &Call, StringRef CallKind,
                        StringRef RemarkKind) {
   ORE.emit([&] {
-    OptimizationRemark R(DEBUG_TYPE, RemarkKind, getRemarkLocation(Call),
-                         Call.getParent());
+    OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
     R << "in ";
     identifyFunction(R, Caller);
     R << ", " << CallKind << ", callee is ";
@@ -167,8 +155,7 @@ static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
                                       const Function &Caller,
                                       const Instruction &Inst) {
   ORE.emit([&] {
-    OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess",
-                         getRemarkLocation(Inst), Inst.getParent());
+    OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
     R << "in ";
     identifyFunction(R, Caller);
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
@@ -278,11 +265,7 @@ void KernelInfo::updateForBB(const BasicBlock &BB,
 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
                            StringRef Name, int64_t Value) {
   ORE.emit([&] {
-    DiagnosticLocation DL = F.getSubprogram()
-                                ? DiagnosticLocation(F.getSubprogram())
-                                : DiagnosticLocation();
-    OptimizationRemark R(DEBUG_TYPE, Name, DL,
-                         !F.empty() ? &F.front() : nullptr);
+    OptimizationRemark R(DEBUG_TYPE, Name, &F);
     R << "in ";
     identifyFunction(R, F);
     R << ", " << Name << " = " << itostr(Value);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d2e4825cf3c81..3306077ac1440 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -941,13 +941,20 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
                 *this, Opt, ThinOrFullLTOPhase::FullLTOPostLink));
           }
         }
-        if (!NoKernelInfoEndLTO) {
-          FunctionPassManager FPM;
-          FPM.addPass(KernelInfoPrinter(this));
-          PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-        }
       });
 
+  // Add kernel-info pass using OptimizerLastEPCallback to run during LTO
+  // while remark streamer is still active
+  if (!NoKernelInfoEndLTO) {
+    PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
+                                              OptimizationLevel Level,
+                                              ThinOrFullLTOPhase Phase) {
+      FunctionPassManager FPM;
+      FPM.addPass(KernelInfoPrinter(this));
+      PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    });
+  }
+
   PB.registerRegClassFilterParsingCallback(
       [](StringRef FilterName) -> RegAllocFilterFunc {
         if (FilterName == "sgpr")
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index ef310e5828f22..b9b6aa55cc875 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -260,12 +260,13 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
       });
 
   if (!NoKernelInfoEndLTO) {
-    PB.registerFullLinkTimeOptimizationLastEPCallback(
-        [this](ModulePassManager &PM, OptimizationLevel Level) {
-          FunctionPassManager FPM;
-          FPM.addPass(KernelInfoPrinter(this));
-          PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-        });
+    PB.registerOptimizerLastEPCallback([this](ModulePassManager &PM,
+                                              OptimizationLevel Level,
+                                              ThinOrFullLTOPhase Phase) {
+      FunctionPassManager FPM;
+      FPM.addPass(KernelInfoPrinter(this));
+      PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    });
   }
 }
 



More information about the llvm-commits mailing list