[llvm] [memprof] Teach extractCallsFromIR to recognize heap allocation functions (PR #115938)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 14:30:04 PST 2024


https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/115938

>From 29d61f1d05b30dbef3bac812b34ba0340c78cc68 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Fri, 8 Nov 2024 18:24:03 -0800
Subject: [PATCH 1/2] [memprof] Teach extractCallsFromIR to recognize heap
 allocation functions

This patch teaches extractCallsFromIR to recognize heap allocation
functions.  Specifically, when we encounter a callee that is known to
be a heap allocation function like "new", we set the callee GUID to 0.

Note that I am planning to do the same for the caller-callee pairs
extracted from the profile.  That is, when I encounter a frame that
does not have a callee, we assume that the frame is calling some heap
allocation function with GUID 0.

Technically, I'm not recognizing enough functions in this patch.
TCMalloc is known to drop certain frames in the call stack immediately
above new.  This patch is meant to lay the groundwork, setting up
GetTLI, plumbing it to extractCallsFromIR, and adjusting the unit
tests.  I'll address remaining issues in subsequent patches.
---
 .../Transforms/Instrumentation/MemProfiler.h  |  5 +-
 .../Instrumentation/MemProfiler.cpp           | 14 ++-
 .../Instrumentation/MemProfUseTest.cpp        | 94 ++++++++++++++++++-
 3 files changed, 108 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index f168ffc4fdb1ef..2f0bf405368870 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -18,6 +18,7 @@
 namespace llvm {
 class Function;
 class Module;
+class TargetLibraryInfo;
 
 namespace vfs {
 class FileSystem;
@@ -86,7 +87,9 @@ using CallEdgeTy = std::pair<LineLocation, uint64_t>;
 
 // Extract all calls from the IR.  Arrange them in a map from caller GUIDs to a
 // list of call sites, each of the form {LineLocation, CalleeGUID}.
-DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> extractCallsFromIR(Module &M);
+DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
+extractCallsFromIR(Module &M,
+                   function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
 
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 64e850c7d9316d..a93792971929fc 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -795,8 +795,8 @@ struct AllocMatchInfo {
   bool Matched = false;
 };
 
-DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
-memprof::extractCallsFromIR(Module &M) {
+DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> memprof::extractCallsFromIR(
+    Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
   DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
 
   auto GetOffset = [](const DILocation *DIL) {
@@ -820,6 +820,8 @@ memprof::extractCallsFromIR(Module &M) {
           continue;
 
         StringRef CalleeName = CalledFunction->getName();
+        bool IsAlloc =
+            isAllocationWithHotColdVariant(CalledFunction, GetTLI(F));
         for (const DILocation *DIL = I.getDebugLoc(); DIL;
              DIL = DIL->getInlinedAt()) {
           StringRef CallerName = DIL->getSubprogramLinkageName();
@@ -827,9 +829,17 @@ memprof::extractCallsFromIR(Module &M) {
                  "Be sure to enable -fdebug-info-for-profiling");
           uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
           uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
+          // Pretend that we are calling a function with GUID == 0 if we are
+          // calling a heap allocation function.
+          if (IsAlloc)
+            CalleeGUID = 0;
           LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
           Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
           CalleeName = CallerName;
+          // FIXME: Recognize other frames that are associated with heap
+          // allocation functions.  It may be too early to reset IsAlloc to
+          // false here.
+          IsAlloc = false;
         }
       }
     }
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
index c864b06e991dc3..4592a721af3170 100644
--- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
+++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
@@ -6,9 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Passes/PassBuilder.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
@@ -80,7 +82,16 @@ declare !dbg !19 void @_Z2f3v()
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
   ASSERT_TRUE(M);
 
-  auto Calls = extractCallsFromIR(*M);
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+  PassBuilder PB;
+  PB.registerFunctionAnalyses(FAM);
+
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto Calls = extractCallsFromIR(*M, GetTLI);
 
   // Expect exactly one caller.
   ASSERT_THAT(Calls, SizeIs(1));
@@ -177,7 +188,16 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
   ASSERT_TRUE(M);
 
-  auto Calls = extractCallsFromIR(*M);
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+  PassBuilder PB;
+  PB.registerFunctionAnalyses(FAM);
+
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto Calls = extractCallsFromIR(*M, GetTLI);
 
   // Expect exactly 4 callers.
   ASSERT_THAT(Calls, SizeIs(4));
@@ -220,4 +240,74 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
   EXPECT_THAT(G3CallSites[1],
               Pair(FieldsAre(2U, 3U), IndexedMemProfRecord::getGUID("_Z2g2v")));
 }
+
+TEST(MemProf, ExtractDirectCallsFromIRCallingNew) {
+  // The following IR is generated from:
+  //
+  // int *foo() {
+  //   return ::new (int);
+  // }
+  StringRef IR = R"IR(
+define dso_local noundef ptr @_Z3foov() #0 !dbg !10 {
+entry:
+  %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !dbg !13
+  ret ptr %call, !dbg !14
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znwm(i64 noundef) #1
+
+attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { builtin allocsize(0) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "foobar.cc", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"}
+!6 = !{i32 8, !"PIC Level", i32 2}
+!7 = !{i32 7, !"PIE Level", i32 2}
+!8 = !{i32 7, !"uwtable", i32 2}
+!9 = !{!"clang"}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 2, column: 10, scope: !10)
+!14 = !DILocation(line: 2, column: 3, scope: !10)
+)IR";
+
+  LLVMContext Ctx;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
+  ASSERT_TRUE(M);
+
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+  PassBuilder PB;
+  PB.registerFunctionAnalyses(FAM);
+
+  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+
+  auto Calls = extractCallsFromIR(*M, GetTLI);
+
+  // Expect exactly one caller.
+  ASSERT_THAT(Calls, SizeIs(1));
+
+  // Verify each key-value pair.
+
+  auto FooIt = Calls.find(IndexedMemProfRecord::getGUID("_Z3foov"));
+  ASSERT_NE(FooIt, Calls.end());
+  const auto &[FooCallerGUID, FooCallSites] = *FooIt;
+  EXPECT_EQ(FooCallerGUID, IndexedMemProfRecord::getGUID("_Z3foov"));
+  ASSERT_THAT(FooCallSites, SizeIs(1));
+  EXPECT_THAT(FooCallSites[0], Pair(FieldsAre(1U, 10U), 0));
+}
 } // namespace

>From 38cfee5c47daec6a2e9d9e0cbf80aca8d603aa1a Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Tue, 12 Nov 2024 14:21:23 -0800
Subject: [PATCH 2/2] Use TargetLibraryInfoWrapperPass.

---
 .../Transforms/Instrumentation/MemProfiler.h  |  3 +-
 .../Instrumentation/MemProfiler.cpp           |  7 ++--
 .../Instrumentation/MemProfUseTest.cpp        | 42 +++++++------------
 3 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index 2f0bf405368870..356735a7096780 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -88,8 +88,7 @@ using CallEdgeTy = std::pair<LineLocation, uint64_t>;
 // Extract all calls from the IR.  Arrange them in a map from caller GUIDs to a
 // list of call sites, each of the form {LineLocation, CalleeGUID}.
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
-extractCallsFromIR(Module &M,
-                   function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+extractCallsFromIR(Module &M, const TargetLibraryInfo &GetTLI);
 
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index a93792971929fc..eda07a3d40e153 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -795,8 +795,8 @@ struct AllocMatchInfo {
   bool Matched = false;
 };
 
-DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> memprof::extractCallsFromIR(
-    Module &M, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
+memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
   DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
 
   auto GetOffset = [](const DILocation *DIL) {
@@ -820,8 +820,7 @@ DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> memprof::extractCallsFromIR(
           continue;
 
         StringRef CalleeName = CalledFunction->getName();
-        bool IsAlloc =
-            isAllocationWithHotColdVariant(CalledFunction, GetTLI(F));
+        bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
         for (const DILocation *DIL = I.getDebugLoc(); DIL;
              DIL = DIL->getInlinedAt()) {
           StringRef CallerName = DIL->getSubprogramLinkageName();
diff --git a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
index 4592a721af3170..cd0e8357a2b2da 100644
--- a/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
+++ b/llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp
@@ -82,16 +82,12 @@ declare !dbg !19 void @_Z2f3v()
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
   ASSERT_TRUE(M);
 
-  FunctionAnalysisManager FAM;
-  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
-  PassBuilder PB;
-  PB.registerFunctionAnalyses(FAM);
+  auto *F = M->getFunction("_Z3foov");
+  ASSERT_NE(F, nullptr);
 
-  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-    return FAM.getResult<TargetLibraryAnalysis>(F);
-  };
-
-  auto Calls = extractCallsFromIR(*M, GetTLI);
+  TargetLibraryInfoWrapperPass WrapperPass;
+  auto &TLI = WrapperPass.getTLI(*F);
+  auto Calls = extractCallsFromIR(*M, TLI);
 
   // Expect exactly one caller.
   ASSERT_THAT(Calls, SizeIs(1));
@@ -188,16 +184,12 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
   ASSERT_TRUE(M);
 
-  FunctionAnalysisManager FAM;
-  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
-  PassBuilder PB;
-  PB.registerFunctionAnalyses(FAM);
-
-  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-    return FAM.getResult<TargetLibraryAnalysis>(F);
-  };
+  auto *F = M->getFunction("_Z3foov");
+  ASSERT_NE(F, nullptr);
 
-  auto Calls = extractCallsFromIR(*M, GetTLI);
+  TargetLibraryInfoWrapperPass WrapperPass;
+  auto &TLI = WrapperPass.getTLI(*F);
+  auto Calls = extractCallsFromIR(*M, TLI);
 
   // Expect exactly 4 callers.
   ASSERT_THAT(Calls, SizeIs(4));
@@ -287,16 +279,12 @@ attributes #2 = { builtin allocsize(0) }
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
   ASSERT_TRUE(M);
 
-  FunctionAnalysisManager FAM;
-  FAM.registerPass([&] { return TargetLibraryAnalysis(); });
-  PassBuilder PB;
-  PB.registerFunctionAnalyses(FAM);
-
-  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
-    return FAM.getResult<TargetLibraryAnalysis>(F);
-  };
+  auto *F = M->getFunction("_Z3foov");
+  ASSERT_NE(F, nullptr);
 
-  auto Calls = extractCallsFromIR(*M, GetTLI);
+  TargetLibraryInfoWrapperPass WrapperPass;
+  auto &TLI = WrapperPass.getTLI(*F);
+  auto Calls = extractCallsFromIR(*M, TLI);
 
   // Expect exactly one caller.
   ASSERT_THAT(Calls, SizeIs(1));



More information about the llvm-commits mailing list