[llvm] 2b21231 - [MemProf] Enhance thin link optimization remarks (#184829)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 23:02:03 PST 2026


Author: Teresa Johnson
Date: 2026-03-05T23:01:58-08:00
New Revision: 2b212315f13e4cec44f25560ce5c3fc742657084

URL: https://github.com/llvm/llvm-project/commit/2b212315f13e4cec44f25560ce5c3fc742657084
DIFF: https://github.com/llvm/llvm-project/commit/2b212315f13e4cec44f25560ce5c3fc742657084.diff

LOG: [MemProf] Enhance thin link optimization remarks (#184829)

Don't require -memprof-report-hinted-sizes for emitting opt remarks
during the thin link step. Invoke the handling also when opt remarks are
enabled for MemProf per OptimizationRemarkEmitter::allowExtraAnalysis.

Also, add a fallback message if we don't have the context size
information, adding tests for those new messages.

I also realized we don't currently emit these messages for MemProf with
regular LTO, and added a TODO.

Added: 
    llvm/test/ThinLTO/X86/remark-missing-info.ll
    llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
    llvm/lib/LTO/LTO.cpp
    llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
index a62f5fde5ed1d..bab7c8cca53ed 100644
--- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -24,6 +24,7 @@
 
 namespace llvm {
 class GlobalValueSummary;
+class LLVMContext;
 class Module;
 class OptimizationRemarkEmitter;
 
@@ -94,6 +95,7 @@ class MemProfContextDisambiguation
   void run(ModuleSummaryIndex &Index,
            function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
                isPrevailing,
+           LLVMContext &Ctx,
            function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark =
                nullptr);
 };

diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 89d642236d244..36360081fa4a0 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -2084,7 +2084,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
   if (EnableMemProfContextDisambiguation) {
     MemProfContextDisambiguation ContextDisambiguation;
     ContextDisambiguation.run(
-        ThinLTO.CombinedIndex, isPrevailing,
+        ThinLTO.CombinedIndex, isPrevailing, RegularLTO.Ctx,
         [&](StringRef PassName, StringRef RemarkName, const Twine &Msg) {
           auto R = OptimizationRemark(PassName.data(), RemarkName,
                                       LinkerRemarkFunction);

diff  --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index afd9469930207..7f6ff6731b29e 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -266,7 +266,8 @@ class CallsiteContextGraph {
 
   /// Main entry point to perform analysis and transformations on graph.
   bool process(function_ref<void(StringRef, StringRef, const Twine &)>
-                   EmitRemark = nullptr);
+                   EmitRemark = nullptr,
+               bool AllowExtraAnalysis = false);
 
   /// Perform cloning on the graph necessary to uniquely identify the allocation
   /// behavior of an allocation based on its context.
@@ -3366,11 +3367,26 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
             Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
                    " due to cold byte percent";
           // Print the internal context id to aid debugging and visualization.
-          Msg += " (context id " + std::to_string(Id) + ")";
-          OS << Msg << "\n";
+          Msg += " (internal context id " + std::to_string(Id) + ")";
+          if (MemProfReportHintedSizes)
+            OS << Msg << "\n";
           if (EmitRemark)
             EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
         }
+      } else {
+        // This is only emitted if the context size info is not present.
+        std::string Msg =
+            "MemProf hinting: " + getAllocTypeString((uint8_t)TypeI->second) +
+            " is " + getAllocTypeString(Node->AllocTypes) + " after cloning";
+        if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
+          Msg += " marked " + getAllocTypeString((uint8_t)AllocTypeFromCall) +
+                 " due to cold byte percent";
+        // Print the internal context id to aid debugging and visualization.
+        Msg += " (internal context id " + std::to_string(Id) + ")";
+        if (MemProfReportHintedSizes)
+          OS << Msg << "\n";
+        if (EmitRemark)
+          EmitRemark(DEBUG_TYPE, "MemProfReport", Msg);
       }
     }
   }
@@ -6304,7 +6320,8 @@ void MemProfContextDisambiguation::performICP(
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
 bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
-    function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
+    function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark,
+    bool AllowExtraAnalysis) {
   if (DumpCCG) {
     dbgs() << "CCG before cloning:\n";
     dbgs() << *this;
@@ -6338,7 +6355,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process(
   if (ExportToDot)
     exportToDot("clonefuncassign");
 
-  if (MemProfReportHintedSizes)
+  if (MemProfReportHintedSizes || AllowExtraAnalysis)
     printTotalSizes(errs(), EmitRemark);
 
   return Changed;
@@ -6365,6 +6382,8 @@ bool MemProfContextDisambiguation::processModule(
     return false;
 
   ModuleCallsiteContextGraph CCG(M, OREGetter);
+  // TODO: Set up remarks for regular LTO. We need to decide what function to
+  // use in the callback.
   return CCG.process();
 }
 
@@ -6429,6 +6448,7 @@ void MemProfContextDisambiguation::run(
     ModuleSummaryIndex &Index,
     llvm::function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
+    LLVMContext &Ctx,
     function_ref<void(StringRef, StringRef, const Twine &)> EmitRemark) {
   // TODO: If/when other types of memprof cloning are enabled beyond just for
   // hot and cold, we will need to change this to individually control the
@@ -6438,8 +6458,11 @@ void MemProfContextDisambiguation::run(
   if (!SupportsHotColdNew)
     return;
 
+  bool AllowExtraAnalysis =
+      OptimizationRemarkEmitter::allowExtraAnalysis(Ctx, DEBUG_TYPE);
+
   IndexCallsiteContextGraph CCG(Index, isPrevailing);
-  CCG.process(EmitRemark);
+  CCG.process(EmitRemark, AllowExtraAnalysis);
 }
 
 // Strips MemProf attributes and metadata. Can be invoked by the pass pipeline

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index cc982f5f79d0a..5637c51b2f9e9 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -35,6 +35,44 @@
 ; REQUIRES: asserts
 
 ; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_ZdaPv, \
+; RUN:	-r=%t.o,sleep, \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-pass-remarks-output=%t.remarks.yaml \
+; RUN:	-o %t.out.2 2>&1
+; RUN: FileCheck %s --check-prefix=REMARKSONLY < %t.remarks.yaml
+
+;; Test that we still get remarks when a filter matches.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_ZdaPv, \
+; RUN:	-r=%t.o,sleep, \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-pass-remarks-filter=memprof-context-disambiguation \
+; RUN:	-pass-remarks-output=%t.remarks.filter.yaml \
+; RUN:	-o %t.out.3 2>&1
+; RUN: FileCheck %s --check-prefix=REMARKSONLY < %t.remarks.filter.yaml
+
+;; Test that we don't get remarks when the filter doesn't match.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_ZdaPv, \
+; RUN:	-r=%t.o,sleep, \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-pass-remarks-filter=nomatch \
+; RUN:	-pass-remarks-output=%t.remarks.nomatch.yaml \
+; RUN:	-o %t.out.4 2>&1
+; RUN: count 0 < %t.remarks.nomatch.yaml
+
+; REMARKSONLY: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; REMARKSONLY: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
+
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
 ; RUN:	-supports-hot-cold-new \
 ; RUN:	-r=%t.o,main,plx \
@@ -289,12 +327,12 @@ attributes #1 = { "memprof"="ambiguous" }
 ; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
 ; DUMP:		Clone of [[BAR]]
 
-; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
-; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
+; REMARK-LINK: <unknown>:0:0: MemProf hinting: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
 
 ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
 ; REMARKS: created clone _Z3barv.memprof.1

diff  --git a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
index d4a3f9bca2cab..e6e2562750cc5 100644
--- a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
+++ b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
@@ -17,8 +17,8 @@
 ;; We should only get these two messages from -memprof-report-hinted-sizes
 ;; as they are the only MIBs with recorded context size info.
 ; SIZES-NOT: full allocation context
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
 ; SIZES-NOT: full allocation context
 
 source_filename = "memprof-report-hinted-partial.ll"

diff  --git a/llvm/test/ThinLTO/X86/remark-missing-info.ll b/llvm/test/ThinLTO/X86/remark-missing-info.ll
new file mode 100644
index 0000000000000..dc4556d0a3318
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/remark-missing-info.ll
@@ -0,0 +1,64 @@
+;; Test that we emit a basic remark during the Thin link when context size info is missing.
+
+; RUN: opt -thinlto-bc %s -o %t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-pass-remarks-output=%t.remarks.yaml \
+; RUN:	-o %t.out 2>&1
+; RUN: FileCheck %s < %t.remarks.yaml
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (internal context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (internal context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call noundef ptr @_Z3foov(), !callsite !0
+  %call1 = call noundef ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+  %call = call noundef ptr @_Z3barv(), !callsite !8
+  ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+  %call = call noundef ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index dda3d2e469c7b..27e42237434f9 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -277,9 +277,9 @@ attributes #6 = { builtin }
 ; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
 ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
 
-; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (context id 1)
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (internal context id 2)
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (internal context id 2)
 
 ; IR: define {{.*}} @main
 ;; The first call to foo does not allocate cold memory. It should call the

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
index ab48563e040f7..2e36445f902ab 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined3.ll
@@ -78,13 +78,13 @@
 
 ;; Cold context 234 is cloned, and only the cloned context is Cold hinted
 ;; (the original doesn't full match with any IR due to inlining).
-; SIZES: NotCold full allocation context 456 with total size 400 is NotCold after cloning (context id 2)
-; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (context id 3)
-; SIZES: Cold full allocation context 234 with total size 200 is NotColdCold after cloning (context id 4)
-; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (context id 7)
-; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (context id 1)
-; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (context id 6)
-; SIZES: Cold full allocation context 234 with total size 200 is Cold after cloning (context id 5)
+; SIZES: NotCold full allocation context 456 with total size 400 is NotCold after cloning (internal context id 2)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (internal context id 3)
+; SIZES: Cold full allocation context 234 with total size 200 is NotColdCold after cloning (internal context id 4)
+; SIZES: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning (internal context id 7)
+; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (internal context id 1)
+; SIZES: Cold full allocation context 345 with total size 300 is Cold after cloning (internal context id 6)
+; SIZES: Cold full allocation context 234 with total size 200 is Cold after cloning (internal context id 5)
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
new file mode 100644
index 0000000000000..396cab7a1edcf
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/remark-missing-info.ll
@@ -0,0 +1,59 @@
+;; Test that we emit a basic remark when context size info is missing.
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:	-memprof-report-hinted-sizes \
+; RUN:	%s -S 2>&1 | FileCheck %s
+
+; CHECK: MemProf hinting: NotCold is NotCold after cloning (internal context id 1)
+; CHECK: MemProf hinting: Cold is Cold after cloning (internal context id 2)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call noundef ptr @_Z3foov(), !callsite !0
+  %call1 = call noundef ptr @_Z3foov(), !callsite !1
+  ret i32 0
+}
+
+define internal ptr @_Z3barv() #3 {
+entry:
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define internal ptr @_Z3bazv() #4 {
+entry:
+  %call = call noundef ptr @_Z3barv(), !callsite !8
+  ret ptr null
+}
+
+define internal ptr @_Z3foov() #5 {
+entry:
+  %call = call noundef ptr @_Z3bazv(), !callsite !9
+  ret ptr null
+}
+
+attributes #0 = { "tune-cpu"="generic" }
+attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
+attributes #4 = { "stack-protector-buffer-size"="8" }
+attributes #5 = { noinline }
+attributes #6 = { builtin }
+
+!llvm.module.flags = !{!20, !21}
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 -5964873800580613432}
+!9 = !{i64 2732490490862098848}
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}


        


More information about the llvm-commits mailing list