[llvm] 2b21231 - [MemProf] Enhance thin link optimization remarks (#184829)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 23:02:03 PST 2026
Author: Teresa Johnson
Date: 2026-03-05T23:01:58-08:00
New Revision: 2b212315f13e4cec44f25560ce5c3fc742657084
URL: https://github.com/llvm/llvm-project/commit/2b212315f13e4cec44f25560ce5c3fc742657084
DIFF: https://github.com/llvm/llvm-project/commit/2b212315f13e4cec44f25560ce5c3fc742657084.diff
LOG: [MemProf] Enhance thin link optimization remarks (#184829)
Don't require -memprof-report-hinted-sizes for emitting opt remarks
during the thin link step. Invoke the handling also when opt remarks are
enabled for MemProf per OptimizationRemarkEmitter::allowExtraAnalysis.
Also, add a fallback message if we don't have the context size
information, adding tests for those new messages.
I also realized we don't currently emit these messages for MemProf with
regular LTO, and added a TODO.
Added:
llvm/test/ThinLTO/X86/remark-missing-info.ll
llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
Modified:
llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
llvm/lib/LTO/LTO.cpp
llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
llvm/test/ThinLTO/X86/memprof-basic.ll
llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
index a62f5fde5ed1d..bab7c8cca53ed 100644
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -24,6 +24,7 @@
namespace llvm {
class GlobalValueSummary;
+class LLVMContext;
class Module;
class OptimizationRemarkEmitter;
@@ -94,6 +95,7 @@ class MemProfContextDisambiguation
void run(ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
+ LLVMContext &Ctx,
function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark =
nullptr);
};
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 89d642236d244..36360081fa4a0 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2084,7 +2084,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
if (EnableMemProfContextDisambiguation) {
MemProfContextDisambiguation ContextDisambiguation;
ContextDisambiguation.run(
- ThinLTO.CombinedIndex, isPrevailing,
+ ThinLTO.CombinedIndex, isPrevailing, RegularLTO.Ctx,
[&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
auto R = OptimizationRemark(PassName.data(), RemarkName,
LinkerRemarkFunction);
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index afd9469930207..7f6ff6731b29e 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -266,7 +266,8 @@ class CallsiteContextGraph {
/// Main entry point to perform analysis and transformations on graph.
bool process(function_ref<void(StringRef, StringRef, const Twine &)>
- EmitRemark = nullptr);
+ EmitRemark = nullptr,
+ bool AllowExtraAnalysis = false);
/// Perform cloning on the graph necessary to uniquely identify the allocation
/// behavior of an allocation based on its context.
@@ -3366,11 +3367,26 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
" due to cold byte percent";
// Print the internal context id to aid debugging and visualization.
- Msg += " (context id " + std::to_string(Id) + ")";
- OS << Msg << "\n";
+ Msg += " (internal context id " + std::to_string(Id) + ")";
+ if (MemProfReportHintedSizes)
+ OS << Msg << "\n";
if (EmitRemark)
EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
}
+ } else {
+ // This is only emitted if the context size info is not present.
+ std::string Msg =
+ "MemProf hinting: " + getAllocTypeString((uint8_t)TypeI->second) +
+ " is " + getAllocTypeString(Node->AllocTypes) + " after cloning";
+ if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
+ Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
+ " due to cold byte percent";
+ // Print the internal context id to aid debugging and visualization.
+ Msg += " (internal context id " + std::to_string(Id) + ")";
+ if (MemProfReportHintedSizes)
+ OS << Msg << "\n";
+ if (EmitRemark)
+ EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
}
}
}
@@ -6304,7 +6320,8 @@ void MemProfContextDisambiguation::performICP(
template <typename DerivedCCG, typename FuncTy, typename CallTy>
bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
- function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
+ function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark,
+ bool AllowExtraAnalysis) {
if (DumpCCG) {
dbgs() << "CCG before cloning:\n";
dbgs() << *this;
@@ -6338,7 +6355,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
if (ExportToDot)
exportToDot("clonefuncassign");
- if (MemProfReportHintedSizes)
+ if (MemProfReportHintedSizes || AllowExtraAnalysis)
printTotalSizes(errs(), EmitRemark);
return Changed;
@@ -6365,6 +6382,8 @@ bool MemProfContextDisambiguation::processModule(
return false;
ModuleCallsiteContextGraph CCG(M, OREGetter);
+ // TODO: Set up remarks for regular LTO. We need to decide what function to
+ // use in the callback.
return CCG.process();
}
@@ -6429,6 +6448,7 @@ void MemProfContextDisambiguation::run(
ModuleSummaryIndex &Index,
llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
+ LLVMContext &Ctx,
function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
// TODO: If/when other types of memprof cloning are enabled beyond just for
// hot and cold, we will need to change this to individually control the
@@ -6438,8 +6458,11 @@ void MemProfContextDisambiguation::run(
if (!SupportsHotColdNew)
return;
+ bool AllowExtraAnalysis =
+ OptimizationRemarkEmitter::allowExtraAnalysis(Ctx, DEBUG_TYPE);
+
IndexCallsiteContextGraph CCG(Index, isPrevailing);
- CCG.process(EmitRemark);
+ CCG.process(EmitRemark, AllowExtraAnalysis);
}
// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index cc982f5f79d0a..5637c51b2f9e9 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -35,6 +35,44 @@
; REQUIRES: asserts
; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_ZdaPv, \
+; RUN: -r=%t.o,sleep, \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-output=%t.remarks.yaml \
+; RUN: -o %t.out.2 2>&1
+; RUN: FileCheck %s --check-prefix=REMARKSONLY < %t.remarks.yaml
+
+;; Test that we still get remarks when a filter matches.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_ZdaPv, \
+; RUN: -r=%t.o,sleep, \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-filter=memprof-context-disambiguation \
+; RUN: -pass-remarks-output=%t.remarks.filter.yaml \
+; RUN: -o %t.out.3 2>&1
+; RUN: FileCheck %s --check-prefix=REMARKSONLY < %t.remarks.filter.yaml
+
+;; Test that we don't get remarks when the filter doesn't match.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_ZdaPv, \
+; RUN: -r=%t.o,sleep, \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-filter=nomatch \
+; RUN: -pass-remarks-output=%t.remarks.nomatch.yaml \
+; RUN: -o %t.out.4 2>&1
+; RUN: count 0 < %t.remarks.nomatch.yaml
+
+; REMARKSONLY: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
+
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
@@ -289,12 +327,12 @@ attributes #1 = { "memprof"="ambiguous" }
; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
; DUMP: Clone of [[BAR]]
-; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
diff --git a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
index d4a3f9bca2cab..e6e2562750cc5 100644
--- a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
+++ b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
@@ -17,8 +17,8 @@
;; We should only get these two messages from -memprof-report-hinted-sizes
;; as they are the only MIBs with recorded context size info.
; SIZES-NOT: full allocation context
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
; SIZES-NOT: full allocation context
source_filename = "memprof-report-hinted-partial.ll"
diff --git a/llvm/test/ThinLTO/X86/remark-missing-info.ll b/llvm/test/ThinLTO/X86/remark-missing-info.ll
new file mode 100644
index 0000000000000..dc4556d0a3318
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/remark-missing-info.ll
@@ -0,0 +1,64 @@
+;; Test that we emit a basic remark during the Thin link when context size info is missing.
+
+; RUN: opt -thinlto-bc %s -o %t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -pass-remarks-output=%t.remarks.yaml \
+; RUN: -o %t.out 2>&1
+; RUN: FileCheck %s < %t.remarks.yaml
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (internal context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (internal context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ %call = call noundef ptr @_Z3foov(), !callsite !0
+ %call1 = call noundef ptr @_Z3foov(), !callsite !1
+ ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+ %call = call noundef ptr @_Z3barv(), !callsite !8
+ ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+ %call = call noundef ptr @_Z3bazv(), !callsite !9
+ ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index dda3d2e469c7b..27e42237434f9 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -277,9 +277,9 @@ attributes #6 = { builtin }
; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
-; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
; IR: define {{.*}} @main
;; The first call to foo does not allocate cold memory. It should call the
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
index ab48563e040f7..2e36445f902ab 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
@@ -78,13 +78,13 @@
;; Cold context 234 is cloned, and only the cloned context is Cold hinted
;; (the original doesn't full match with any IR due to inlining).
-; SIZES: NotCold full allocation context 456 with total size 400 is NotCold after cloning (context id 2)
-; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (context id 3)
-; SIZES: Cold full allocation context 234 with total size 200 is NotColdCold after cloning (context id 4)
-; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (context id 7)
-; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (context id 1)
-; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (context id 6)
-; SIZES: Cold full allocation context 234 with total size 200 is Cold after cloning (context id 5)
+; SIZES: NotCold full allocation context 456 with total size 400 is NotCold after cloning (internal context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (internal context id 3)
+; SIZES: Cold full allocation context 234 with total size 200 is NotColdCold after cloning (internal context id 4)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (internal context id 7)
+; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (internal context id 6)
+; SIZES: Cold full allocation context 234 with total size 200 is Cold after cloning (internal context id 5)
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
new file mode 100644
index 0000000000000..396cab7a1edcf
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
@@ -0,0 +1,59 @@
+;; Test that we emit a basic remark when context size info is missing.
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN: -memprof-report-hinted-sizes \
+; RUN: %s -S 2>&1 | FileCheck %s
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (internal context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (internal context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ %call = call noundef ptr @_Z3foov(), !callsite !0
+ %call1 = call noundef ptr @_Z3foov(), !callsite !1
+ ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+ %call = call noundef ptr @_Z3barv(), !callsite !8
+ ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+ %call = call noundef ptr @_Z3bazv(), !callsite !9
+ ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
More information about the llvm-commits
mailing list