[llvm] [ThinLTO][MemProf] Support remark emission for thin link and use in MemProf (PR #182570)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 20 11:02:36 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lto
Author: Teresa Johnson (teresajohnson)
<details>
<summary>Changes</summary>
Enable optimization remark emission during the ThinLTO thin link phase.
This is useful for global analysis passes like MemProf context
disambiguation which operate on the summary index and may need to
report diagnostics before any IR modules are available.
Key changes:
- Create a dummy function ("thinlto_remark_dummy") in a private Module
within the LTO class to provide the necessary Function context for
OptimizationRemarkEmitter.
- Update MemProfContextDisambiguation to use a callback for remark
emission, allowing it to report hinted sizes and other diagnostics
during the thin link.
- Ensure the dummy module and function are safely cleaned up at the end
of the LTO session via the LTO::cleanup mechanism.
---
Full diff: https://github.com/llvm/llvm-project/pull/182570.diff
5 Files Affected:
- (modified) llvm/include/llvm/LTO/LTO.h (+9)
- (modified) llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h (+3-1)
- (modified) llvm/lib/LTO/LTO.cpp (+24-1)
- (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+24-16)
- (modified) llvm/test/ThinLTO/X86/memprof-basic.ll (+5-2)
``````````diff
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index c49d5a2198efd..3e877d22c3f6c 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -642,6 +642,15 @@ class LTO {
// Diagnostic optimization remarks file
LLVMRemarkFileHandle DiagnosticOutputFile;
+ // A dummy module to host the dummy function.
+ std::unique_ptr<Module> DummyModule;
+
+ // A dummy function created in a private module to provide a context for
+ // LTO-link optimization remarks. This is needed for ThinLTO where we
+ // may not have any IR functions available, because the optimization remark
+ // handling requires a function.
+ Function *LinkerRemarkFunction = nullptr;
+
// Setup optimization remarks according to the provided configuration.
Error setupOptimizationRemarks();
diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
index 576f1eb09953a..a62f5fde5ed1d 100644
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -93,7 +93,9 @@ class MemProfContextDisambiguation
void run(ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
- isPrevailing);
+ isPrevailing,
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark =
+ nullptr);
};
/// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 6c0d7d5bd16e9..7029811a3e085 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -81,6 +81,20 @@ Error LTO::setupOptimizationRemarks() {
return DiagFileOrErr.takeError();
DiagnosticOutputFile = std::move(*DiagFileOrErr);
+
+ // Create a dummy function to serve as a context for LTO-link remarks.
+ // This is required because OptimizationRemark requires a valid Function,
+ // and in ThinLTO we may not have any IR functions available during the
+ // thin link.
+ // Host it in a private module to avoid interfering with the LTO process.
+ if (!LinkerRemarkFunction) {
+ DummyModule = std::make_unique<Module>("remark_dummy", RegularLTO.Ctx);
+ LinkerRemarkFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(RegularLTO.Ctx), false),
+ GlobalValue::ExternalLinkage, "thinlto_remark_dummy",
+ DummyModule.get());
+ }
+
return Error::success();
}
@@ -668,6 +682,8 @@ LTO::LTO(Config Conf, ThinBackend Backend,
LTO::~LTO() = default;
void LTO::cleanup() {
+ DummyModule.reset();
+ LinkerRemarkFunction = nullptr;
consumeError(finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)));
}
@@ -2039,7 +2055,14 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
};
if (EnableMemProfContextDisambiguation) {
MemProfContextDisambiguation ContextDisambiguation;
- ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing);
+ ContextDisambiguation.run(
+ ThinLTO.CombinedIndex, isPrevailing,
+ [&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
+ auto R = OptimizationRemark(PassName.data(), RemarkName,
+ LinkerRemarkFunction);
+ R << Msg.str();
+ emitRemark(R);
+ });
}
// Figure out which symbols need to be internalized. This also needs to happen
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c531fecc945b8..afd9469930207 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -265,7 +265,8 @@ class CallsiteContextGraph {
CallsiteContextGraph(CallsiteContextGraph &&) = default;
/// Main entry point to perform analysis and transformations on graph.
- bool process();
+ bool process(function_ref<void(StringRef, StringRef, const Twine &)>
+ EmitRemark = nullptr);
/// Perform cloning on the graph necessary to uniquely identify the allocation
/// behavior of an allocation based on its context.
@@ -280,7 +281,9 @@ class CallsiteContextGraph {
void dump() const;
void print(raw_ostream &OS) const;
- void printTotalSizes(raw_ostream &OS) const;
+ void printTotalSizes(raw_ostream &OS,
+ function_ref<void(StringRef, StringRef, const Twine &)>
+ EmitRemark = nullptr) const;
friend raw_ostream &operator<<(raw_ostream &OS,
const CallsiteContextGraph &CCG) {
@@ -3336,7 +3339,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
- raw_ostream &OS) const {
+ raw_ostream &OS,
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) const {
using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
for (const auto Node : nodes<GraphType>(this)) {
if (Node->isRemoved())
@@ -3353,17 +3357,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
auto CSI = ContextIdToContextSizeInfos.find(Id);
if (CSI != ContextIdToContextSizeInfos.end()) {
for (auto &Info : CSI->second) {
- OS << "MemProf hinting: "
- << getAllocTypeString((uint8_t)TypeI->second)
- << " full allocation context " << Info.FullStackId
- << " with total size " << Info.TotalSize << " is "
- << getAllocTypeString(Node->AllocTypes) << " after cloning";
+ std::string Msg =
+ "MemProf hinting: " + getAllocTypeString((uint8_t)TypeI->second) +
+ " full allocation context " + std::to_string(Info.FullStackId) +
+ " with total size " + std::to_string(Info.TotalSize) + " is " +
+ getAllocTypeString(Node->AllocTypes) + " after cloning";
if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
- OS << " marked " << getAllocTypeString((uint8_t)AllocTypeFromCall)
- << " due to cold byte percent";
+ Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
+ " due to cold byte percent";
// Print the internal context id to aid debugging and visualization.
- OS << " (context id " << Id << ")";
- OS << "\n";
+ Msg += " (context id " + std::to_string(Id) + ")";
+ OS << Msg << "\n";
+ if (EmitRemark)
+ EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
}
}
}
@@ -6297,7 +6303,8 @@ void MemProfContextDisambiguation::performICP(
}
template <typename DerivedCCG, typename FuncTy, typename CallTy>
-bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
+bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
if (DumpCCG) {
dbgs() << "CCG before cloning:\n";
dbgs() << *this;
@@ -6332,7 +6339,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
exportToDot("clonefuncassign");
if (MemProfReportHintedSizes)
- printTotalSizes(errs());
+ printTotalSizes(errs(), EmitRemark);
return Changed;
}
@@ -6421,7 +6428,8 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
void MemProfContextDisambiguation::run(
ModuleSummaryIndex &Index,
llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
- isPrevailing) {
+ isPrevailing,
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
// TODO: If/when other types of memprof cloning are enabled beyond just for
// hot and cold, we will need to change this to individually control the
// AllocationType passed to addStackNodesForMIB during CCG construction.
@@ -6431,7 +6439,7 @@ void MemProfContextDisambiguation::run(
return;
IndexCallsiteContextGraph CCG(Index, isPrevailing);
- CCG.process();
+ CCG.process(EmitRemark);
}
// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 537e1b8a26839..cc982f5f79d0a 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -47,7 +47,7 @@
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=DUMP-SIZES \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \
-; RUN: --check-prefix=SIZES
+; RUN: --check-prefix=SIZES --check-prefix=REMARK-LINK
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -70,7 +70,7 @@
; RUN: -memprof-report-hinted-sizes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
-; RUN: --check-prefix=STATS --check-prefix=SIZES
+; RUN: --check-prefix=STATS --check-prefix=SIZES --check-prefix=REMARK-LINK
; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
@@ -290,8 +290,11 @@ attributes #1 = { "memprof"="ambiguous" }
; DUMP: Clone of [[BAR]]
; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
``````````
</details>
https://github.com/llvm/llvm-project/pull/182570
More information about the llvm-commits
mailing list