[llvm] e3905a4 - [MemProf] Merge all callee guids for indirect call VP metadata (#170964)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 8 12:47:21 PST 2025


Author: Teresa Johnson
Date: 2025-12-08T12:47:17-08:00
New Revision: e3905a4aa6aaeef643f4e0c64fc6402186548057

URL: https://github.com/llvm/llvm-project/commit/e3905a4aa6aaeef643f4e0c64fc6402186548057
DIFF: https://github.com/llvm/llvm-project/commit/e3905a4aa6aaeef643f4e0c64fc6402186548057.diff

LOG: [MemProf] Merge all callee guids for indirect call VP metadata (#170964)

When matching memprof profiles, for indirect calls we use the callee
guids recorded on callsites in the profile to synthesize indirect call
VP metadata when none exists. However, we only do this for the first
matching CallSiteEntry from the profile.

In some case there can be multiple, for example when the current
function was eventually inlined into multiple callers. Profile
generation propagates the CallSiteEntry from those callers into the
inlined callee's profile as it may not yet have been inlined in the
new compile.

To capture all of these potential indirect call targets, merge callee
guids across all matching CallSiteEntries.

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
    llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index c3ff90ea35a72..25953f43e1aa9 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -509,59 +509,59 @@ struct CallSiteEntry {
   ArrayRef<Frame> Frames;
   // Potential targets for indirect calls.
   ArrayRef<GlobalValue::GUID> CalleeGuids;
-
-  // Only compare Frame contents.
-  // Use pointer-based equality instead of ArrayRef's operator== which does
-  // element-wise comparison. We want to check if it's the same slice of the
-  // underlying array, not just equivalent content.
-  bool operator==(const CallSiteEntry &Other) const {
-    return Frames.data() == Other.Frames.data() &&
-           Frames.size() == Other.Frames.size();
-  }
 };
 
-struct CallSiteEntryHash {
-  size_t operator()(const CallSiteEntry &Entry) const {
-    return computeFullStackId(Entry.Frames);
-  }
-};
-
-static void handleCallSite(
-    Instruction &I, const Function *CalledFunction,
-    ArrayRef<uint64_t> InlinedCallStack,
-    const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
-    Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites,
-    OptimizationRemarkEmitter &ORE) {
+static void handleCallSite(Instruction &I, const Function *CalledFunction,
+                           ArrayRef<uint64_t> InlinedCallStack,
+                           const std::vector<CallSiteEntry> &CallSiteEntries,
+                           Module &M,
+                           std::set<std::vector<uint64_t>> &MatchedCallSites,
+                           OptimizationRemarkEmitter &ORE) {
   auto &Ctx = M.getContext();
+  // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them
+  // to support cases where the instuction's inlined frames match multiple call
+  // site entries, which can happen if the profile was collected from a binary
+  // where this instruction was eventually inlined into multiple callers.
+  SetVector<GlobalValue::GUID> CalleeGuids;
+  bool CallsiteMDAdded = false;
   for (const auto &CallSiteEntry : CallSiteEntries) {
     // If we found and thus matched all frames on the call, create and
     // attach call stack metadata.
     if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
                                            InlinedCallStack)) {
       NumOfMemProfMatchedCallSites++;
-      addCallsiteMetadata(I, InlinedCallStack, Ctx);
-
-      // Try to attach indirect call metadata if possible.
-      if (!CalledFunction)
-        addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
-
       // Only need to find one with a matching call stack and add a single
       // callsite metadata.
-
-      // Accumulate call site matching information upon request.
-      if (ClPrintMemProfMatchInfo) {
-        std::vector<uint64_t> CallStack;
-        append_range(CallStack, InlinedCallStack);
-        MatchedCallSites.insert(std::move(CallStack));
+      if (!CallsiteMDAdded) {
+        addCallsiteMetadata(I, InlinedCallStack, Ctx);
+
+        // Accumulate call site matching information upon request.
+        if (ClPrintMemProfMatchInfo) {
+          std::vector<uint64_t> CallStack;
+          append_range(CallStack, InlinedCallStack);
+          MatchedCallSites.insert(std::move(CallStack));
+        }
+        ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
+                 << ore::NV("CallSite", &I) << " in function "
+                 << ore::NV("Caller", I.getFunction())
+                 << " matched callsite with frame count "
+                 << ore::NV("Frames", InlinedCallStack.size()));
+
+        // If this is a direct call, we're done.
+        if (CalledFunction)
+          break;
+        CallsiteMDAdded = true;
       }
-      ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
-               << ore::NV("CallSite", &I) << " in function "
-               << ore::NV("Caller", I.getFunction())
-               << " matched callsite with frame count "
-               << ore::NV("Frames", InlinedCallStack.size()));
-      break;
+
+      assert(!CalledFunction && "Didn't expect direct call");
+
+      // Collect Callee GUIDs from all matching CallSiteEntries.
+      CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(),
+                         CallSiteEntry.CalleeGuids.end());
     }
   }
+  // Try to attach indirect call metadata if possible.
+  addVPMetadata(M, I, CalleeGuids.getArrayRef());
 }
 
 static void readMemprof(Module &M, Function &F,
@@ -639,8 +639,7 @@ static void readMemprof(Module &M, Function &F,
 
   // For the callsites we need to record slices of the frame array (see comments
   // below where the map entries are added) along with their CalleeGuids.
-  std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
-      LocHashToCallSites;
+  std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites;
   for (auto &AI : MemProfRec->AllocSites) {
     NumOfMemProfAllocContextProfiles++;
     // Associate the allocation info with the leaf frame. The later matching
@@ -659,7 +658,7 @@ static void readMemprof(Module &M, Function &F,
       uint64_t StackId = computeStackId(StackFrame);
       ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
       ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
-      LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
+      LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids});
 
       ProfileHasColumns |= StackFrame.Column;
       // Once we find this function, we can stop recording.

diff  --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
index ad83da285694a..ac7dc77d85b3f 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -3,7 +3,7 @@
 ;; Basic functionality with flag toggle
 ; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
 ; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
-; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE --dump-input-filter=all
 
 ;; FDO conflict handling
 ; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
@@ -18,6 +18,18 @@ HeapProfileRecords:
       - Frames:
           - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
         CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+      # The next 2 sets of frames simulates the case where this function was
+      # eventually inlined into multiple callers. We would have propagated the
+      # resulting frames and callee guids here for matching with they not yet
+      # inlined bar. We should aggregate all callee guids into the metadata.
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
+          - { Function: _Z3foov, LineOffset: 1, Column: 6, IsInlineFrame: false }
+        CalleeGuids:   [0x1234, 0x2345]
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true }
+          - { Function: _Z3foov, LineOffset: 10, Column: 7, IsInlineFrame: false }
+        CalleeGuids:   [0x3456, 0x4567]
 ...
 
 ;--- basic.ll
@@ -31,7 +43,7 @@ entry:
   ret void
 }
 
-; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 6, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1, i64 4660, i64 1, i64 9029, i64 1, i64 13398, i64 1, i64 17767, i64 1}
 
 !llvm.module.flags = !{!2, !3}
 


        


More information about the llvm-commits mailing list