[llvm] c7b421d - [MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 31 12:53:34 PDT 2025


Author: Snehasish Kumar
Date: 2025-05-31T12:53:30-07:00
New Revision: c7b421deac59948690910dd3e1bb16ef590846a3

URL: https://github.com/llvm/llvm-project/commit/c7b421deac59948690910dd3e1bb16ef590846a3
DIFF: https://github.com/llvm/llvm-project/commit/c7b421deac59948690910dd3e1bb16ef590846a3.diff

LOG: [MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)

Use the newly introduced CalleeGuids in CallSiteInfo to annotate the IR
where necessary with value profile metadata. Use a synthetic count of 1
since we don't have actual counts in the profile collection.

Added: 
    llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test

Modified: 
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c03aa5accc011..177b94003af25 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -178,6 +178,12 @@ static cl::opt<bool>
                         cl::desc("Salvage stale MemProf profile"),
                         cl::init(false), cl::Hidden);
 
+static cl::opt<bool> ClMemProfAttachCalleeGuids(
+    "memprof-attach-calleeguids",
+    cl::desc(
+        "Attach calleeguids as value profile metadata for indirect calls."),
+    cl::init(true), cl::Hidden);
+
 extern cl::opt<bool> MemProfReportHintedSizes;
 extern cl::opt<unsigned> MinClonedColdBytePercent;
 extern cl::opt<unsigned> MinCallsiteColdBytePercent;
@@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
     UndriftCallStack(CS.Frames);
 }
 
+// Helper function to process CalleeGuids and create value profile metadata
+static void addVPMetadata(Module &M, Instruction &I,
+                          ArrayRef<GlobalValue::GUID> CalleeGuids) {
+  if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
+    return;
+
+  if (I.getMetadata(LLVMContext::MD_prof)) {
+    uint64_t Unused;
+    // TODO: When merging is implemented, increase this to a typical ICP value
+    // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
+    // is sufficient
+    auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
+                                               /*MaxNumValueData=*/1, Unused);
+    // We don't know how to merge value profile data yet.
+    if (!ExistingVD.empty()) {
+      return;
+    }
+  }
+
+  SmallVector<InstrProfValueData, 4> VDs;
+  uint64_t TotalCount = 0;
+
+  for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
+    InstrProfValueData VD;
+    VD.Value = CalleeGUID;
+    // For MemProf, we don't have actual call counts, so we assign
+    // a weight of 1 to each potential target.
+    // TODO: Consider making this weight configurable or increasing it to
+    // improve effectiveness for ICP.
+    VD.Count = 1;
+    VDs.push_back(VD);
+    TotalCount += VD.Count;
+  }
+
+  if (!VDs.empty()) {
+    annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
+                      VDs.size());
+  }
+}
+
 static void
 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             const TargetLibraryInfo &TLI,
@@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
   // Build maps of the location hash to all profile data with that leaf location
   // (allocation info and the callsites).
   std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
-  // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
-  struct CallStackHash {
-    size_t operator()(ArrayRef<Frame> CS) const {
-      return computeFullStackId(CS);
+
+  // Helper struct for maintaining refs to callsite data. As an alternative we
+  // could store a pointer to the CallSiteInfo struct but we also need the frame
+  // index. Using ArrayRefs instead makes it a little easier to read.
+  struct CallSiteEntry {
+    // Subset of frames for the corresponding CallSiteInfo.
+    ArrayRef<Frame> Frames;
+    // Potential targets for indirect calls.
+    ArrayRef<GlobalValue::GUID> CalleeGuids;
+
+    // Only compare Frame contents.
+    // Use pointer-based equality instead of ArrayRef's operator== which does
+    // element-wise comparison. We want to check if it's the same slice of the
+    // underlying array, not just equivalent content.
+    bool operator==(const CallSiteEntry &Other) const {
+      return Frames.data() == Other.Frames.data() &&
+             Frames.size() == Other.Frames.size();
+    }
+  };
+
+  struct CallSiteEntryHash {
+    size_t operator()(const CallSiteEntry &Entry) const {
+      return computeFullStackId(Entry.Frames);
     }
   };
+
   // For the callsites we need to record slices of the frame array (see comments
-  // below where the map entries are added).
-  std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
+  // below where the map entries are added) along with their CalleeGuids.
+  std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
       LocHashToCallSites;
   for (auto &AI : MemProfRec->AllocSites) {
     NumOfMemProfAllocContextProfiles++;
@@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
     unsigned Idx = 0;
     for (auto &StackFrame : CS.Frames) {
       uint64_t StackId = computeStackId(StackFrame);
-      LocHashToCallSites[StackId].insert(
-          ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
+      ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
+      ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
+      LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
+
       ProfileHasColumns |= StackFrame.Column;
       // Once we find this function, we can stop recording.
       if (StackFrame.Function == FuncGUID)
@@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
       // Otherwise, add callsite metadata. If we reach here then we found the
       // instruction's leaf location in the callsites map and not the allocation
       // map.
-      for (auto CallStackIdx : CallSitesIter->second) {
+      for (const auto &CallSiteEntry : CallSitesIter->second) {
         // If we found and thus matched all frames on the call, create and
         // attach call stack metadata.
-        if (stackFrameIncludesInlinedCallStack(CallStackIdx,
+        if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
                                                InlinedCallStack)) {
           NumOfMemProfMatchedCallSites++;
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
+
+          // Try to attach indirect call metadata if possible.
+          if (!CalledFunction)
+            addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
+
           // Only need to find one with a matching call stack and add a single
           // callsite metadata.
 

diff  --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
new file mode 100644
index 0000000000000..ad83da285694a
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -0,0 +1,88 @@
+; RUN: split-file %s %t
+
+;; Basic functionality with flag toggle
+; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
+
+;; FDO conflict handling
+; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
+; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
+
+;--- basic.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3barv
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+...
+
+;--- basic.ll
+define dso_local void @_Z3barv() !dbg !4 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  call void %0(), !dbg !5
+; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
+; CHECK-DISABLE-NOT: !prof
+  ret void
+}
+
+; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "t", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
+!5 = !DILocation(line: 4, column: 5, scope: !4)
+
+;--- fdo_conflict.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3foov
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+      - Frames:
+          - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x555556789abcdef0, 0x666656789abcdef1]
+...
+
+;--- fdo_conflict.ll
+define dso_local void @_Z3foov() !dbg !14 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  ; This call already has FDO value profile metadata - should NOT be modified
+  ; CHECK-FDO: call void %0(), {{.*}} !prof !6
+  call void %0(), !dbg !15, !prof !16
+
+  %1 = load ptr, ptr %fp, align 8
+  ; This call does NOT have existing metadata - should get MemProf annotation
+  ; CHECK-FDO: call void %1(), {{.*}} !prof !9
+  call void %1(), !dbg !17
+  ret void
+}
+
+!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+
+; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
+
+!llvm.module.flags = !{!12, !13}
+
+!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
+!11 = !DIFile(filename: "t", directory: "/")
+!12 = !{i32 7, !"Dwarf Version", i32 5}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
+!15 = !DILocation(line: 4, column: 5, scope: !14)
+!17 = !DILocation(line: 6, column: 5, scope: !14)


        


More information about the llvm-commits mailing list