[llvm] c7b421d - [MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 12:53:34 PDT 2025
Author: Snehasish Kumar
Date: 2025-05-31T12:53:30-07:00
New Revision: c7b421deac59948690910dd3e1bb16ef590846a3
URL: https://github.com/llvm/llvm-project/commit/c7b421deac59948690910dd3e1bb16ef590846a3
DIFF: https://github.com/llvm/llvm-project/commit/c7b421deac59948690910dd3e1bb16ef590846a3.diff
LOG: [MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)
Use the newly introduced CalleeGuids in CallSiteInfo to annotate the IR
where necessary with value profile metadata. Use a synthetic count of 1
since we don't have actual counts in the profile collection.
Added:
llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
Modified:
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c03aa5accc011..177b94003af25 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -178,6 +178,12 @@ static cl::opt<bool>
cl::desc("Salvage stale MemProf profile"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> ClMemProfAttachCalleeGuids(
+ "memprof-attach-calleeguids",
+ cl::desc(
+ "Attach calleeguids as value profile metadata for indirect calls."),
+ cl::init(true), cl::Hidden);
+
extern cl::opt<bool> MemProfReportHintedSizes;
extern cl::opt<unsigned> MinClonedColdBytePercent;
extern cl::opt<unsigned> MinCallsiteColdBytePercent;
@@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
UndriftCallStack(CS.Frames);
}
+// Helper function to process CalleeGuids and create value profile metadata
+static void addVPMetadata(Module &M, Instruction &I,
+ ArrayRef<GlobalValue::GUID> CalleeGuids) {
+ if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
+ return;
+
+ if (I.getMetadata(LLVMContext::MD_prof)) {
+ uint64_t Unused;
+ // TODO: When merging is implemented, increase this to a typical ICP value
+ // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
+ // is sufficient
+ auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
+ /*MaxNumValueData=*/1, Unused);
+ // We don't know how to merge value profile data yet.
+ if (!ExistingVD.empty()) {
+ return;
+ }
+ }
+
+ SmallVector<InstrProfValueData, 4> VDs;
+ uint64_t TotalCount = 0;
+
+ for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
+ InstrProfValueData VD;
+ VD.Value = CalleeGUID;
+ // For MemProf, we don't have actual call counts, so we assign
+ // a weight of 1 to each potential target.
+ // TODO: Consider making this weight configurable or increasing it to
+ // improve effectiveness for ICP.
+ VD.Count = 1;
+ VDs.push_back(VD);
+ TotalCount += VD.Count;
+ }
+
+ if (!VDs.empty()) {
+ annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
+ VDs.size());
+ }
+}
+
static void
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
@@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// Build maps of the location hash to all profile data with that leaf location
// (allocation info and the callsites).
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
- // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
- struct CallStackHash {
- size_t operator()(ArrayRef<Frame> CS) const {
- return computeFullStackId(CS);
+
+ // Helper struct for maintaining refs to callsite data. As an alternative we
+ // could store a pointer to the CallSiteInfo struct but we also need the frame
+ // index. Using ArrayRefs instead makes it a little easier to read.
+ struct CallSiteEntry {
+ // Subset of frames for the corresponding CallSiteInfo.
+ ArrayRef<Frame> Frames;
+ // Potential targets for indirect calls.
+ ArrayRef<GlobalValue::GUID> CalleeGuids;
+
+ // Only compare Frame contents.
+ // Use pointer-based equality instead of ArrayRef's operator== which does
+ // element-wise comparison. We want to check if it's the same slice of the
+ // underlying array, not just equivalent content.
+ bool operator==(const CallSiteEntry &Other) const {
+ return Frames.data() == Other.Frames.data() &&
+ Frames.size() == Other.Frames.size();
+ }
+ };
+
+ struct CallSiteEntryHash {
+ size_t operator()(const CallSiteEntry &Entry) const {
+ return computeFullStackId(Entry.Frames);
}
};
+
// For the callsites we need to record slices of the frame array (see comments
- // below where the map entries are added).
- std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
+ // below where the map entries are added) along with their CalleeGuids.
+ std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
NumOfMemProfAllocContextProfiles++;
@@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
unsigned Idx = 0;
for (auto &StackFrame : CS.Frames) {
uint64_t StackId = computeStackId(StackFrame);
- LocHashToCallSites[StackId].insert(
- ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
+ ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
+ ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
+ LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
+
ProfileHasColumns |= StackFrame.Column;
// Once we find this function, we can stop recording.
if (StackFrame.Function == FuncGUID)
@@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// Otherwise, add callsite metadata. If we reach here then we found the
// instruction's leaf location in the callsites map and not the allocation
// map.
- for (auto CallStackIdx : CallSitesIter->second) {
+ for (const auto &CallSiteEntry : CallSitesIter->second) {
// If we found and thus matched all frames on the call, create and
// attach call stack metadata.
- if (stackFrameIncludesInlinedCallStack(CallStackIdx,
+ if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
InlinedCallStack)) {
NumOfMemProfMatchedCallSites++;
addCallsiteMetadata(I, InlinedCallStack, Ctx);
+
+ // Try to attach indirect call metadata if possible.
+ if (!CalledFunction)
+ addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
+
// Only need to find one with a matching call stack and add a single
// callsite metadata.
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
new file mode 100644
index 0000000000000..ad83da285694a
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -0,0 +1,88 @@
+; RUN: split-file %s %t
+
+;; Basic functionality with flag toggle
+; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
+
+;; FDO conflict handling
+; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
+; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
+
+;--- basic.yaml
+---
+HeapProfileRecords:
+ - GUID: _Z3barv
+ AllocSites: []
+ CallSites:
+ - Frames:
+ - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
+ CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
+...
+
+;--- basic.ll
+define dso_local void @_Z3barv() !dbg !4 {
+entry:
+ %fp = alloca ptr, align 8
+ %0 = load ptr, ptr %fp, align 8
+ call void %0(), !dbg !5
+; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
+; CHECK-DISABLE-NOT: !prof
+ ret void
+}
+
+; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "t", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
+!5 = !DILocation(line: 4, column: 5, scope: !4)
+
+;--- fdo_conflict.yaml
+---
+HeapProfileRecords:
+ - GUID: _Z3foov
+ AllocSites: []
+ CallSites:
+ - Frames:
+ - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
+ CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
+ - Frames:
+ - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
+ CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1]
+...
+
+;--- fdo_conflict.ll
+define dso_local void @_Z3foov() !dbg !14 {
+entry:
+ %fp = alloca ptr, align 8
+ %0 = load ptr, ptr %fp, align 8
+ ; This call already has FDO value profile metadata - should NOT be modified
+ ; CHECK-FDO: call void %0(), {{.*}} !prof !6
+ call void %0(), !dbg !15, !prof !16
+
+ %1 = load ptr, ptr %fp, align 8
+ ; This call does NOT have existing metadata - should get MemProf annotation
+ ; CHECK-FDO: call void %1(), {{.*}} !prof !9
+ call void %1(), !dbg !17
+ ret void
+}
+
+!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+
+; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
+
+!llvm.module.flags = !{!12, !13}
+
+!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
+!11 = !DIFile(filename: "t", directory: "/")
+!12 = !{i32 7, !"Dwarf Version", i32 5}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
+!15 = !DILocation(line: 4, column: 5, scope: !14)
+!17 = !DILocation(line: 6, column: 5, scope: !14)
More information about the llvm-commits
mailing list