[llvm] [MemProf] Enhance thin link optimization remarks (PR #184829)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 09:51:39 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lto
Author: Teresa Johnson (teresajohnson)
<details>
<summary>Changes</summary>
Don't require -memprof-report-hinted-sizes for emitting opt remarks
during the thin link step. Invoke the handling also when opt remarks are
enabled for MemProf per OptimizationRemarkEmitter::allowExtraAnalysis.
Also, add a fallback message if we don't have the context size
information, adding tests for those new messages.
I also realized we don't currently emit these messages for MemProf with
regular LTO, and added a TODO.
---
Full diff: https://github.com/llvm/llvm-project/pull/184829.diff
6 Files Affected:
- (modified) llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h (+2)
- (modified) llvm/lib/LTO/LTO.cpp (+1-1)
- (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+27-5)
- (modified) llvm/test/ThinLTO/X86/memprof-basic.ll (+14)
- (added) llvm/test/ThinLTO/X86/remark-missing-info.ll (+64)
- (added) llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll (+59)
``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
index a62f5fde5ed1d..bab7c8cca53ed 100644
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -24,6 +24,7 @@
namespace llvm {
class GlobalValueSummary;
+class LLVMContext;
class Module;
class OptimizationRemarkEmitter;
@@ -94,6 +95,7 @@ class MemProfContextDisambiguation
void run(ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
+ LLVMContext &Ctx,
function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark =
nullptr);
};
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 89d642236d244..36360081fa4a0 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2084,7 +2084,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
if (EnableMemProfContextDisambiguation) {
MemProfContextDisambiguation ContextDisambiguation;
ContextDisambiguation.run(
- ThinLTO.CombinedIndex, isPrevailing,
+ ThinLTO.CombinedIndex, isPrevailing, RegularLTO.Ctx,
[&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
auto R = OptimizationRemark(PassName.data(), RemarkName,
LinkerRemarkFunction);
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index afd9469930207..e158332826dc9 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -266,7 +266,8 @@ class CallsiteContextGraph {
/// Main entry point to perform analysis and transformations on graph.
bool process(function_ref<void(StringRef, StringRef, const Twine &)>
- EmitRemark = nullptr);
+ EmitRemark = nullptr,
+ bool AllowExtraAnalysis = false);
/// Perform cloning on the graph necessary to uniquely identify the allocation
/// behavior of an allocation based on its context.
@@ -3367,10 +3368,24 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
" due to cold byte percent";
// Print the internal context id to aid debugging and visualization.
Msg += " (context id " + std::to_string(Id) + ")";
- OS << Msg << "\n";
+ if (MemProfReportHintedSizes)
+ OS << Msg << "\n";
if (EmitRemark)
EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
}
+ } else {
+ std::string Msg =
+ "MemProf hinting: " + getAllocTypeString((uint8_t)TypeI->second) +
+ " is " + getAllocTypeString(Node->AllocTypes) + " after cloning";
+ if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
+ Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
+ " due to cold byte percent";
+ // Print the internal context id to aid debugging and visualization.
+ Msg += " (context id " + std::to_string(Id) + ")";
+ if (MemProfReportHintedSizes)
+ OS << Msg << "\n";
+ if (EmitRemark)
+ EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
}
}
}
@@ -6304,7 +6319,8 @@ void MemProfContextDisambiguation::performICP(
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
- function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark,
+ bool AllowExtraAnalysis) {
if (DumpCCG) {
dbgs() << "CCG before cloning:\n";
dbgs() << *this;
@@ -6338,7 +6354,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
if (ExportToDot)
exportToDot("clonefuncassign");
- if (MemProfReportHintedSizes)
+ if (MemProfReportHintedSizes || AllowExtraAnalysis)
printTotalSizes(errs(), EmitRemark);
return Changed;
@@ -6365,6 +6381,8 @@ bool MemProfContextDisambiguation::processModule(
return false;
ModuleCallsiteContextGraph CCG(M, OREGetter);
+ // TODO: Set up remarks for regular LTO. We need to decide what function to
+ // use in the callback.
return CCG.process();
}
@@ -6429,6 +6447,7 @@ void MemProfContextDisambiguation::run(
ModuleSummaryIndex &Index,
llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
+ LLVMContext &Ctx,
function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
// TODO: If/when other types of memprof cloning are enabled beyond just for
// hot and cold, we will need to change this to individually control the
@@ -6438,8 +6457,11 @@ void MemProfContextDisambiguation::run(
if (!SupportsHotColdNew)
return;
+ bool AllowExtraAnalysis =
+ OptimizationRemarkEmitter::allowExtraAnalysis(Ctx, DEBUG_TYPE);
+
IndexCallsiteContextGraph CCG(Index, isPrevailing);
- CCG.process(EmitRemark);
+ CCG.process(EmitRemark, AllowExtraAnalysis);
}
// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index cc982f5f79d0a..06786839732bc 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -35,6 +35,20 @@
; REQUIRES: asserts
; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_ZdaPv, \
+; RUN: -r=%t.o,sleep, \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-output=%t.remarks.yaml \
+; RUN: -o %t.out.2 2>&1
+; RUN: FileCheck %s --check-prefix=REMARKSONLY < %t.remarks.yaml
+
+; REMARKSONLY: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
diff --git a/llvm/test/ThinLTO/X86/remark-missing-info.ll b/llvm/test/ThinLTO/X86/remark-missing-info.ll
new file mode 100644
index 0000000000000..c20f91c48d851
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/remark-missing-info.ll
@@ -0,0 +1,64 @@
+;; Test that we emit a basic remark during the Thin link when context size info is missing.
+
+; RUN: opt -thinlto-bc %s -o %t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-output=%t.remarks.yaml \
+; RUN: -o %t.out 2>&1
+; RUN: FileCheck %s < %t.remarks.yaml
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ %call = call noundef ptr @_Z3foov(), !callsite !0
+ %call1 = call noundef ptr @_Z3foov(), !callsite !1
+ ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+ %call = call noundef ptr @_Z3barv(), !callsite !8
+ ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+ %call = call noundef ptr @_Z3bazv(), !callsite !9
+ ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
new file mode 100644
index 0000000000000..6d5935b73dfd2
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
@@ -0,0 +1,59 @@
+;; Test that we emit a basic remark when context size info is missing.
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN: -memprof-report-hinted-sizes \
+; RUN: %s -S 2>&1 | FileCheck %s
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ %call = call noundef ptr @_Z3foov(), !callsite !0
+ %call1 = call noundef ptr @_Z3foov(), !callsite !1
+ ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+ %call = call noundef ptr @_Z3barv(), !callsite !8
+ ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+ %call = call noundef ptr @_Z3bazv(), !callsite !9
+ ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
``````````
</details>
https://github.com/llvm/llvm-project/pull/184829
More information about the llvm-commits
mailing list