[llvm] [MemProf] Optionally match profiles on to manually hinted hot/cold new (PR #91027)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri May 3 15:51:25 PDT 2024


https://github.com/teresajohnson created https://github.com/llvm/llvm-project/pull/91027

While we don't currently rewrite the hints on manually hot/cold hinted
allocations, enable optionally matching profiles onto those allocations
as a first step to being able to do this.

By explicitly checking whether the library function is in the list of
operator new also fixes one limitation of the prior call to isNewLikeFn.
Some operator new calls (those that specify nothrow) are considered
Malloc-like because they may return null. We want to be able to match
and rewrite these. Therefore the new test uses a nothrow variant to test
the fix for this as well.

>From 92c7e0117956077f39b94ababd0795f887759e2b Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Fri, 3 May 2024 15:43:20 -0700
Subject: [PATCH] [MemProf] Optionally match profiles on to manually hinted
 hot/cold new

While we don't currently rewrite the hints on manually hot/cold hinted
allocations, enable optionally matching profiles onto those allocations
as a first step to being able to do this.

By explicitly checking whether the library function is in the list of
operator new also fixes one limitation of the prior call to isNewLikeFn.
Some operator new calls (those that specify nothrow) are considered
Malloc-like because they may return null. We want to be able to match
and rewrite these. Therefore the new test uses a nothrow variant to test
the fix for this as well.
---
 .../Instrumentation/MemProfiler.cpp           | 42 ++++++++++++-
 .../memprof_match_hot_cold_new_calls.ll       | 63 +++++++++++++++++++
 2 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/memprof_match_hot_cold_new_calls.ll

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2445ba7f0dda99..c0a3bf8464d2d3 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -140,6 +140,15 @@ static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
                                cl::Hidden, cl::init(-1));
 
+// By default disable matching of allocation profiles onto operator new that
+// already explicitly pass a hot/cold hint, since we don't currently
+// override these hints anyway.
+static cl::opt<bool> ClMemProfMatchHotColdNew(
+    "memprof-match-hot-cold-new",
+    cl::desc(
+        "Match allocation profiles onto existing hot/cold operator new calls"),
+    cl::Hidden, cl::init(false));
+
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
@@ -661,6 +670,37 @@ stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
   return InlCallStackIter == InlinedCallStack.end();
 }
 
+static bool isNewWithHotColdVariant(Function *Callee,
+                                    const TargetLibraryInfo &TLI) {
+  if (!Callee)
+    return false;
+  LibFunc Func;
+  if (!TLI.getLibFunc(*Callee, Func))
+    return false;
+  switch (Func) {
+  case LibFunc_Znwm:
+  case LibFunc_ZnwmRKSt9nothrow_t:
+  case LibFunc_ZnwmSt11align_val_t:
+  case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
+  case LibFunc_Znam:
+  case LibFunc_ZnamRKSt9nothrow_t:
+  case LibFunc_ZnamSt11align_val_t:
+  case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
+    return true;
+  case LibFunc_Znwm12__hot_cold_t:
+  case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
+  case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
+  case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+  case LibFunc_Znam12__hot_cold_t:
+  case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
+  case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
+  case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+    return ClMemProfMatchHotColdNew;
+  default:
+    return false;
+  }
+}
+
 static void readMemprof(Module &M, Function &F,
                         IndexedInstrProfReader *MemProfReader,
                         const TargetLibraryInfo &TLI) {
@@ -812,7 +852,7 @@ static void readMemprof(Module &M, Function &F,
       if (AllocInfoIter != LocHashToAllocInfo.end()) {
         // Only consider allocations via new, to reduce unnecessary metadata,
         // since those are the only allocations that will be targeted initially.
-        if (!isNewLikeFn(CI, &TLI))
+        if (!isNewWithHotColdVariant(CI->getCalledFunction(), TLI))
           continue;
         // We may match this instruction's location list to multiple MIB
         // contexts. Add them to a Trie specialized for trimming the contexts to
diff --git a/llvm/test/Transforms/PGOProfile/memprof_match_hot_cold_new_calls.ll b/llvm/test/Transforms/PGOProfile/memprof_match_hot_cold_new_calls.ll
new file mode 100644
index 00000000000000..2fac931e15688f
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_match_hot_cold_new_calls.ll
@@ -0,0 +1,63 @@
+;; Tests optional matching of memprof profile on call to operator new
+;; with manual hot/cold hint.
+
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+;; This test uses the same raw profile used for memprof.ll, see instructions
+;; in that file for updating.
+
+;; Generate indexed profile
+; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+
+;; By default we should not match profile on to manually hinted operator
+;; new calls, because we don't currently override the manual hints anyway.
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S 2>&1 | FileCheck %s --implicit-check-not !memprof --implicit-check-not !callsite
+
+;; Check that we match profiles onto these manually hinted new calls
+;; under the -memprof-match-hot-cold-new=true option.
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-match-hot-cold-new=true 2>&1 | FileCheck %s --check-prefixes=MEMPROF
+
+; ModuleID = 'memprof_match_hot_cold_new_calls.cc'
+source_filename = "memprof_match_hot_cold_new_calls.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"struct.std::nothrow_t" = type { i8 }
+
+ at _ZSt7nothrow = external global %"struct.std::nothrow_t", align 1
+
+define dso_local noundef ptr @_Z3foov() !dbg !10 {
+entry:
+  ; MEMPROF: call {{.*}} @_Znwm{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
+  %call = call noalias noundef align 32 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 noundef 32, i64 noundef 32, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow, i8 noundef zeroext 0), !dbg !13
+  ret ptr %call
+}
+
+declare noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1), i8 noundef zeroext)
+
+; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]], ![[MIB3:[0-9]+]], ![[MIB4:[0-9]+]], ![[MIB5:[0-9]+]]}
+; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 748269490701775343}
+; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 1544787832369987002}
+; MEMPROF: ![[MIB3]] = !{![[STACK3:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK3]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934}
+; MEMPROF: ![[MIB4]] = !{![[STACK4:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK4]] = !{i64 2732490490862098848, i64 8467819354083268568}
+; MEMPROF: ![[MIB5]] = !{![[STACK5:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK5]] = !{i64 2732490490862098848, i64 8690657650969109624}
+; MEMPROF: ![[C1]] = !{i64 2732490490862098848}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 5, column: 10, scope: !10)



More information about the llvm-commits mailing list