[llvm] [MemProf] Attach value profile metadata to the IR using CalleeGuids. (PR #141164)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Sat May 31 10:32:09 PDT 2025


https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/141164

>From 2858bad04e66ce04ba0270da6d34af088b616df2 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Thu, 22 May 2025 17:00:14 -0700
Subject: [PATCH 01/10] [MemProf] Attach value profile metadata to the IR using
 CalleeGuids.

Use the newly introduced CalleeGuids in CallSiteInfo to annotate the IR
where necessary with value profile metadata. Use a synthetic count of 1
since we don't have actual counts in the profile collection.
---
 .../Instrumentation/MemProfiler.cpp           | 35 ++++++++++++++++++
 .../memprof_annotate_indirect_call_yaml.test  | 37 +++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 6538311571529..29206781f1743 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -1209,6 +1209,41 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
                                                InlinedCallStack)) {
           NumOfMemProfMatchedCallSites++;
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
+
+          // Check if this is an indirect call and we have GUID information
+          // from CallSiteInfo to attach value profile metadata
+          if (!CalledFunction) {
+            // This is an indirect call, look for CallSites with matching stacks
+            // that have CalleeGuids information
+            for (auto &CS : MemProfRec->CallSites) {
+              if (!CS.CalleeGuids.empty() && stackFrameIncludesInlinedCallStack(
+                                                 CS.Frames, InlinedCallStack)) {
+                // Create value profile data from the CalleeGuids
+                SmallVector<InstrProfValueData, 4> VDs;
+                uint64_t TotalCount = 0;
+
+                for (GlobalValue::GUID CalleeGUID : CS.CalleeGuids) {
+                  // For MemProf, we don't have actual call counts, so we assign
+                  // a weight of 1 to each potential target. This provides the
+                  // information needed for indirect call promotion without
+                  // specific count data.
+                  InstrProfValueData VD;
+                  VD.Value = CalleeGUID;
+                  VD.Count = 1; // Weight for ICP decision making
+                  VDs.push_back(VD);
+                  TotalCount += VD.Count;
+                }
+
+                if (!VDs.empty()) {
+                  // Attach value profile metadata for indirect call targets
+                  annotateValueSite(M, I, VDs, TotalCount,
+                                    IPVK_IndirectCallTarget, VDs.size());
+                }
+                break;
+              }
+            }
+          }
+
           // Only need to find one with a matching call stack and add a single
           // callsite metadata.
 
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
new file mode 100644
index 0000000000000..dcea8f14f5fe9
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
@@ -0,0 +1,37 @@
+; Make sure that we can ingest the MemProf profile in YAML with CalleeGuids
+; and annotate an indirect call with value profile metadata.
+
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_annotate_indirect_call_yaml.yaml -o %t/memprof_annotate_indirect_call_yaml.memprofdata
+; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -S 2>&1 | FileCheck %s
+
+;--- memprof_annotate_indirect_call_yaml.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3barv
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+...
+;--- memprof_annotate_indirect_call_yaml.ll
+define dso_local void @_Z3barv() !dbg !4 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  call void %0(), !dbg !5
+; CHECK: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
+  ret void
+}
+
+; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "t", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
+!5 = !DILocation(line: 4, column: 5, scope: !4)

>From da9b985eb60236f18b47b386fde29ad00eab5b46 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 23 May 2025 12:09:18 -0700
Subject: [PATCH 02/10] Add an option to toggle application of CalleeGuids.

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp        | 7 ++++++-
 .../PGOProfile/memprof_annotate_indirect_call_yaml.test    | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 29206781f1743..de79f99fc5609 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -177,6 +177,11 @@ static cl::opt<bool>
                         cl::desc("Salvage stale MemProf profile"),
                         cl::init(false), cl::Hidden);
 
+static cl::opt<bool> ClMemProfAttachCalleeGuids(
+    "memprof-attach-calleeguids",
+    cl::desc("Attach CalleeGuids value profile metadata to IR"),
+    cl::Hidden, cl::init(true));
+
 extern cl::opt<bool> MemProfReportHintedSizes;
 extern cl::opt<unsigned> MinClonedColdBytePercent;
 extern cl::opt<unsigned> MinCallsiteColdBytePercent;
@@ -1212,7 +1217,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
 
           // Check if this is an indirect call and we have GUID information
           // from CallSiteInfo to attach value profile metadata
-          if (!CalledFunction) {
+          if (!CalledFunction && ClMemProfAttachCalleeGuids) {
             // This is an indirect call, look for CallSites with matching stacks
             // that have CalleeGuids information
             for (auto &CS : MemProfRec->CallSites) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
index dcea8f14f5fe9..49287e4d6ea1f 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
@@ -4,6 +4,7 @@
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_annotate_indirect_call_yaml.yaml -o %t/memprof_annotate_indirect_call_yaml.memprofdata
 ; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -S 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
 
 ;--- memprof_annotate_indirect_call_yaml.yaml
 ---
@@ -22,6 +23,7 @@ entry:
   %0 = load ptr, ptr %fp, align 8
   call void %0(), !dbg !5
 ; CHECK: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
+; CHECK-DISABLE-NOT: !prof
   ret void
 }
 

>From c31f858f5b2ad49ce13525d61150b748ddb1662a Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 23 May 2025 12:09:54 -0700
Subject: [PATCH 03/10] Fix formatting.

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index de79f99fc5609..6ea7c36ff4fe4 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -179,8 +179,8 @@ static cl::opt<bool>
 
 static cl::opt<bool> ClMemProfAttachCalleeGuids(
     "memprof-attach-calleeguids",
-    cl::desc("Attach CalleeGuids value profile metadata to IR"),
-    cl::Hidden, cl::init(true));
+    cl::desc("Attach CalleeGuids value profile metadata to IR"), cl::Hidden,
+    cl::init(true));
 
 extern cl::opt<bool> MemProfReportHintedSizes;
 extern cl::opt<unsigned> MinClonedColdBytePercent;

>From cd8536ae8c9743eab2c6ae53e06f9836787e569d Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 24 May 2025 12:38:19 -0700
Subject: [PATCH 04/10] Handle pre-existing vp metadata.

---
 .../Instrumentation/MemProfiler.cpp           | 17 ++++++
 ...f_annotate_indirect_call_fdo_conflict.test | 53 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 6ea7c36ff4fe4..a40b1ab1e9546 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -1218,6 +1218,23 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
           // Check if this is an indirect call and we have GUID information
           // from CallSiteInfo to attach value profile metadata
           if (!CalledFunction && ClMemProfAttachCalleeGuids) {
+            // Check if the instruction already has value profile metadata from
+            // FDO. If it does, skip adding MemProf value profiles to avoid
+            // conflicts. FDO profiles typically have more accurate count data,
+            // so they take precedence over MemProf synthetic counts.
+            if (I.getMetadata(LLVMContext::MD_prof)) {
+              // Use the existing helper function to check for indirect call
+              // target value profiling metadata
+              uint64_t TotalCount;
+              auto ExistingVD = getValueProfDataFromInst(
+                  I, IPVK_IndirectCallTarget, ~0U, TotalCount);
+              if (!ExistingVD.empty()) {
+                // Instruction already has FDO value profile metadata for
+                // indirect call targets, skip adding MemProf value profiles
+                break;
+              }
+            }
+
             // This is an indirect call, look for CallSites with matching stacks
             // that have CalleeGuids information
             for (auto &CS : MemProfRec->CallSites) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
new file mode 100644
index 0000000000000..997ecc8675938
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
@@ -0,0 +1,53 @@
+; This test verifies that MemProf value profiles do not override existing FDO
+; value profile metadata on indirect calls, but do annotate calls that don't
+; have existing FDO metadata.
+
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_fdo_conflict.yaml -o %t/memprof_fdo_conflict.memprofdata
+; RUN: opt < %t/memprof_fdo_conflict.ll -passes='memprof-use<profile-filename=%t/memprof_fdo_conflict.memprofdata>' -S 2>&1 | FileCheck %s
+
+;--- memprof_fdo_conflict.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3barv
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 5, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x555556789abcdef0, 0x666656789abcdef1]
+...
+;--- memprof_fdo_conflict.ll
+define dso_local void @_Z3barv() !dbg !4 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  ; This call already has FDO value profile metadata - should NOT be modified
+  call void %0(), !dbg !5, !prof !6
+; CHECK: call void %0(), {{.*}} !prof !6
+; CHECK-NOT: call void %0(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
+
+  ; This call does NOT have existing metadata - should get MemProf annotation
+  %1 = load ptr, ptr %fp, align 8
+  call void %1(), !dbg !7
+; CHECK: call void %1(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
+  ret void
+}
+
+; Existing FDO value profile metadata (should be preserved)
+!6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+
+; CHECK: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+; CHECK: ![[NEWPROF]] = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
+
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "t", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
+!5 = !DILocation(line: 4, column: 5, scope: !4)
+!7 = !DILocation(line: 6, column: 5, scope: !4)

>From e7dbf78ad8693d752d49596f1f79ce4e1dfd217a Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 24 May 2025 18:14:32 -0700
Subject: [PATCH 05/10] Efficient callsite lookup and refactoring.

---
 .../Instrumentation/MemProfiler.cpp           | 127 ++++++++++--------
 1 file changed, 68 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index a40b1ab1e9546..e9e2628eb00a7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -969,6 +969,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
     UndriftCallStack(CS.Frames);
 }
 
+// Helper function to process CalleeGuids and create value profile metadata
+static void addVPMetadata(Module &M, Instruction &I,
+                          ArrayRef<GlobalValue::GUID> CalleeGuids) {
+  if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
+    return;
+
+  if (I.getMetadata(LLVMContext::MD_prof)) {
+    // Use the existing helper function to check for indirect call
+    // target value profiling metadata
+    uint64_t Unused;
+    auto ExistingVD =
+        getValueProfDataFromInst(I, IPVK_IndirectCallTarget, ~0U, Unused);
+    // We don't know how to merge value profile data yet.
+    if (!ExistingVD.empty()) {
+      return;
+    }
+  }
+
+  SmallVector<InstrProfValueData, 4> VDs;
+  uint64_t TotalCount = 0;
+
+  for (GlobalValue::GUID CalleeGUID : CalleeGuids) {
+    // For MemProf, we don't have actual call counts, so we assign
+    // a weight of 1 to each potential target. This provides the
+    // information needed for indirect call promotion without
+    // specific count data.
+    InstrProfValueData VD;
+    VD.Value = CalleeGUID;
+    VD.Count = 1; // Weight for ICP decision making
+    VDs.push_back(VD);
+    TotalCount += VD.Count;
+  }
+
+  if (!VDs.empty()) {
+    // Attach value profile metadata for indirect call targets
+    annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
+                      VDs.size());
+  }
+}
+
 static void
 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             const TargetLibraryInfo &TLI,
@@ -1036,15 +1076,28 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
   // Build maps of the location hash to all profile data with that leaf location
   // (allocation info and the callsites).
   std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
-  // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
-  struct CallStackHash {
-    size_t operator()(ArrayRef<Frame> CS) const {
-      return computeFullStackId(CS);
+
+  struct CallSiteEntry {
+    // Subset of frames for the corresponding CallSiteInfo.
+    ArrayRef<Frame> Frames;
+    // Potential targets for indirect calls.
+    ArrayRef<GlobalValue::GUID> CalleeGuids;
+
+    bool operator==(const CallSiteEntry &Other) const {
+      return Frames.data() == Other.Frames.data() &&
+             Frames.size() == Other.Frames.size();
+    }
+  };
+
+  struct CallSiteEntryHash {
+    size_t operator()(const CallSiteEntry &Entry) const {
+      return computeFullStackId(Entry.Frames);
     }
   };
+
   // For the callsites we need to record slices of the frame array (see comments
-  // below where the map entries are added).
-  std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
+  // below where the map entries are added) along with their CalleeGuids.
+  std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
       LocHashToCallSites;
   for (auto &AI : MemProfRec->AllocSites) {
     NumOfMemProfAllocContextProfiles++;
@@ -1062,8 +1115,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
     unsigned Idx = 0;
     for (auto &StackFrame : CS.Frames) {
       uint64_t StackId = computeStackId(StackFrame);
-      LocHashToCallSites[StackId].insert(
-          ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
+      ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
+      ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
+      LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
+
       ProfileHasColumns |= StackFrame.Column;
       // Once we find this function, we can stop recording.
       if (StackFrame.Function == FuncGUID)
@@ -1207,63 +1262,17 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
       // Otherwise, add callsite metadata. If we reach here then we found the
       // instruction's leaf location in the callsites map and not the allocation
       // map.
-      for (auto CallStackIdx : CallSitesIter->second) {
+      for (const auto &CallSiteEntry : CallSitesIter->second) {
         // If we found and thus matched all frames on the call, create and
         // attach call stack metadata.
-        if (stackFrameIncludesInlinedCallStack(CallStackIdx,
+        if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
                                                InlinedCallStack)) {
           NumOfMemProfMatchedCallSites++;
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
 
-          // Check if this is an indirect call and we have GUID information
-          // from CallSiteInfo to attach value profile metadata
-          if (!CalledFunction && ClMemProfAttachCalleeGuids) {
-            // Check if the instruction already has value profile metadata from
-            // FDO. If it does, skip adding MemProf value profiles to avoid
-            // conflicts. FDO profiles typically have more accurate count data,
-            // so they take precedence over MemProf synthetic counts.
-            if (I.getMetadata(LLVMContext::MD_prof)) {
-              // Use the existing helper function to check for indirect call
-              // target value profiling metadata
-              uint64_t TotalCount;
-              auto ExistingVD = getValueProfDataFromInst(
-                  I, IPVK_IndirectCallTarget, ~0U, TotalCount);
-              if (!ExistingVD.empty()) {
-                // Instruction already has FDO value profile metadata for
-                // indirect call targets, skip adding MemProf value profiles
-                break;
-              }
-            }
-
-            // This is an indirect call, look for CallSites with matching stacks
-            // that have CalleeGuids information
-            for (auto &CS : MemProfRec->CallSites) {
-              if (!CS.CalleeGuids.empty() && stackFrameIncludesInlinedCallStack(
-                                                 CS.Frames, InlinedCallStack)) {
-                // Create value profile data from the CalleeGuids
-                SmallVector<InstrProfValueData, 4> VDs;
-                uint64_t TotalCount = 0;
-
-                for (GlobalValue::GUID CalleeGUID : CS.CalleeGuids) {
-                  // For MemProf, we don't have actual call counts, so we assign
-                  // a weight of 1 to each potential target. This provides the
-                  // information needed for indirect call promotion without
-                  // specific count data.
-                  InstrProfValueData VD;
-                  VD.Value = CalleeGUID;
-                  VD.Count = 1; // Weight for ICP decision making
-                  VDs.push_back(VD);
-                  TotalCount += VD.Count;
-                }
-
-                if (!VDs.empty()) {
-                  // Attach value profile metadata for indirect call targets
-                  annotateValueSite(M, I, VDs, TotalCount,
-                                    IPVK_IndirectCallTarget, VDs.size());
-                }
-                break;
-              }
-            }
+          // Try to attach indirect call metadata if possible.
+          if (!CalledFunction) {
+            addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
           }
 
           // Only need to find one with a matching call stack and add a single

>From 918d650fe9ef331d0d7ad41cb217a344d86c1044 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 24 May 2025 18:35:07 -0700
Subject: [PATCH 06/10] Make the flag false by default and update the tests.

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp       | 8 +++-----
 .../memprof_annotate_indirect_call_fdo_conflict.test      | 2 +-
 .../PGOProfile/memprof_annotate_indirect_call_yaml.test   | 6 ++++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index e9e2628eb00a7..4c40cc2edd2dd 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -95,7 +95,7 @@ static cl::opt<bool> ClInsertVersionCheck(
 // This flag may need to be replaced with -f[no-]memprof-reads.
 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
                                        cl::desc("instrument read instructions"),
-                                       cl::Hidden, cl::init(true));
+                                       cl::Hidden, cl::init(false));
 
 static cl::opt<bool>
     ClInstrumentWrites("memprof-instrument-writes",
@@ -177,10 +177,8 @@ static cl::opt<bool>
                         cl::desc("Salvage stale MemProf profile"),
                         cl::init(false), cl::Hidden);
 
-static cl::opt<bool> ClMemProfAttachCalleeGuids(
-    "memprof-attach-calleeguids",
-    cl::desc("Attach CalleeGuids value profile metadata to IR"), cl::Hidden,
-    cl::init(true));
+static cl::opt<bool> ClMemProfAttachCalleeGuids("memprof-attach-calleeguids",
+                                                cl::init(false));
 
 extern cl::opt<bool> MemProfReportHintedSizes;
 extern cl::opt<unsigned> MinClonedColdBytePercent;
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
index 997ecc8675938..03a72d8ee631d 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
@@ -4,7 +4,7 @@
 
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_fdo_conflict.yaml -o %t/memprof_fdo_conflict.memprofdata
-; RUN: opt < %t/memprof_fdo_conflict.ll -passes='memprof-use<profile-filename=%t/memprof_fdo_conflict.memprofdata>' -S 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof_fdo_conflict.ll -passes='memprof-use<profile-filename=%t/memprof_fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s
 
 ;--- memprof_fdo_conflict.yaml
 ---
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
index 49287e4d6ea1f..1aece3de857cc 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
@@ -5,6 +5,7 @@
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_annotate_indirect_call_yaml.yaml -o %t/memprof_annotate_indirect_call_yaml.memprofdata
 ; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -S 2>&1 | FileCheck %s
 ; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
+; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
 
 ;--- memprof_annotate_indirect_call_yaml.yaml
 ---
@@ -22,12 +23,13 @@ entry:
   %fp = alloca ptr, align 8
   %0 = load ptr, ptr %fp, align 8
   call void %0(), !dbg !5
-; CHECK: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
+; CHECK-NOT: !prof
+; CHECK-ENABLE: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
 ; CHECK-DISABLE-NOT: !prof
   ret void
 }
 
-; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+; CHECK-ENABLE: ![[PROF]] = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
 
 !llvm.module.flags = !{!2, !3}
 

>From 85ed751d5da5d8f7df510af341862bec3e37abe4 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 30 May 2025 10:47:51 -0700
Subject: [PATCH 07/10] Undo unrelated change.

---
 .../Instrumentation/MemProfiler.cpp           | 26 ++++++++++---------
 ...f_annotate_indirect_call_fdo_conflict.test |  9 +++----
 .../memprof_annotate_indirect_call_yaml.test  |  2 --
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 4c40cc2edd2dd..30d0b15d7acfe 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -95,7 +95,7 @@ static cl::opt<bool> ClInsertVersionCheck(
 // This flag may need to be replaced with -f[no-]memprof-reads.
 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
                                        cl::desc("instrument read instructions"),
-                                       cl::Hidden, cl::init(false));
+                                       cl::Hidden, cl::init(true));
 
 static cl::opt<bool>
     ClInstrumentWrites("memprof-instrument-writes",
@@ -177,8 +177,11 @@ static cl::opt<bool>
                         cl::desc("Salvage stale MemProf profile"),
                         cl::init(false), cl::Hidden);
 
-static cl::opt<bool> ClMemProfAttachCalleeGuids("memprof-attach-calleeguids",
-                                                cl::init(false));
+static cl::opt<bool> ClMemProfAttachCalleeGuids(
+    "memprof-attach-calleeguids",
+    cl::desc(
+        "Attach calleeguids as value profile metadata for indirect calls."),
+    cl::init(true), cl::Hidden);
 
 extern cl::opt<bool> MemProfReportHintedSizes;
 extern cl::opt<unsigned> MinClonedColdBytePercent;
@@ -974,8 +977,6 @@ static void addVPMetadata(Module &M, Instruction &I,
     return;
 
   if (I.getMetadata(LLVMContext::MD_prof)) {
-    // Use the existing helper function to check for indirect call
-    // target value profiling metadata
     uint64_t Unused;
     auto ExistingVD =
         getValueProfDataFromInst(I, IPVK_IndirectCallTarget, ~0U, Unused);
@@ -988,20 +989,17 @@ static void addVPMetadata(Module &M, Instruction &I,
   SmallVector<InstrProfValueData, 4> VDs;
   uint64_t TotalCount = 0;
 
-  for (GlobalValue::GUID CalleeGUID : CalleeGuids) {
-    // For MemProf, we don't have actual call counts, so we assign
-    // a weight of 1 to each potential target. This provides the
-    // information needed for indirect call promotion without
-    // specific count data.
+  for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
     InstrProfValueData VD;
     VD.Value = CalleeGUID;
-    VD.Count = 1; // Weight for ICP decision making
+    // For MemProf, we don't have actual call counts, so we assign
+    // a weight of 1 to each potential target.
+    VD.Count = 1;
     VDs.push_back(VD);
     TotalCount += VD.Count;
   }
 
   if (!VDs.empty()) {
-    // Attach value profile metadata for indirect call targets
     annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
                       VDs.size());
   }
@@ -1075,12 +1073,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
   // (allocation info and the callsites).
   std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
 
+  // Helper struct for maintaining refs to callsite data. As an alternative we
+  // could store a pointer to the CallSiteInfo struct but we also need the frame
+  // index. Using ArrayRefs instead makes it a little easier to read.
   struct CallSiteEntry {
     // Subset of frames for the corresponding CallSiteInfo.
     ArrayRef<Frame> Frames;
     // Potential targets for indirect calls.
     ArrayRef<GlobalValue::GUID> CalleeGuids;
 
+    // Only compare Frame contents.
     bool operator==(const CallSiteEntry &Other) const {
       return Frames.data() == Other.Frames.data() &&
              Frames.size() == Other.Frames.size();
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
index 03a72d8ee631d..da8d530c606de 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
@@ -25,18 +25,17 @@ entry:
   %fp = alloca ptr, align 8
   %0 = load ptr, ptr %fp, align 8
   ; This call already has FDO value profile metadata - should NOT be modified
+  ; CHECK: call void %0(), {{.*}} !prof !6
+  ; CHECK-NOT: call void %0(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
   call void %0(), !dbg !5, !prof !6
-; CHECK: call void %0(), {{.*}} !prof !6
-; CHECK-NOT: call void %0(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
 
-  ; This call does NOT have existing metadata - should get MemProf annotation
   %1 = load ptr, ptr %fp, align 8
+  ; This call does NOT have existing metadata - should get MemProf annotation
+  ; CHECK: call void %1(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
   call void %1(), !dbg !7
-; CHECK: call void %1(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
   ret void
 }
 
-; Existing FDO value profile metadata (should be preserved)
 !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
 
 ; CHECK: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
index 1aece3de857cc..6a14ebb6755ab 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
@@ -3,7 +3,6 @@
 
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_annotate_indirect_call_yaml.yaml -o %t/memprof_annotate_indirect_call_yaml.memprofdata
-; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -S 2>&1 | FileCheck %s
 ; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
 ; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
 
@@ -23,7 +22,6 @@ entry:
   %fp = alloca ptr, align 8
   %0 = load ptr, ptr %fp, align 8
   call void %0(), !dbg !5
-; CHECK-NOT: !prof
 ; CHECK-ENABLE: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
 ; CHECK-DISABLE-NOT: !prof
   ret void

>From ca1cc248c3c0957f14636384b379bce0e82a3892 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 30 May 2025 11:51:03 -0700
Subject: [PATCH 08/10] Combine tests into a single file.

---
 .../memprof_annotate_indirect_call.test       | 89 +++++++++++++++++++
 ...f_annotate_indirect_call_fdo_conflict.test | 52 -----------
 .../memprof_annotate_indirect_call_yaml.test  | 39 --------
 3 files changed, 89 insertions(+), 91 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
 delete mode 100644 llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
 delete mode 100644 llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test

diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
new file mode 100644
index 0000000000000..71a3a467a4128
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -0,0 +1,89 @@
+; RUN: split-file %s %t
+
+; COM: Basic functionality with flag toggle
+; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
+; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
+
+; COM: FDO conflict handling
+; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
+; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
+
+;--- basic.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3barv
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+...
+
+;--- basic.ll
+define dso_local void @_Z3barv() !dbg !4 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  call void %0(), !dbg !5
+; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
+; CHECK-DISABLE-NOT: !prof
+  ret void
+}
+
+; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
+
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "t", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
+!5 = !DILocation(line: 4, column: 5, scope: !4)
+
+;--- fdo_conflict.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z3foov
+    AllocSites:      []
+    CallSites:
+      - Frames:
+          - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
+      - Frames:
+          - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
+        CalleeGuids:   [0x555556789abcdef0, 0x666656789abcdef1]
+...
+
+;--- fdo_conflict.ll
+define dso_local void @_Z3foov() !dbg !14 {
+entry:
+  %fp = alloca ptr, align 8
+  %0 = load ptr, ptr %fp, align 8
+  ; This call already has FDO value profile metadata - should NOT be modified
+  ; CHECK-FDO: call void %0(), {{.*}} !prof !6
+  ; CHECK-FDO-NOT: call void %0(), {{.*}} !prof !9
+  call void %0(), !dbg !15, !prof !16
+
+  %1 = load ptr, ptr %fp, align 8
+  ; This call does NOT have existing metadata - should get MemProf annotation
+  ; CHECK-FDO: call void %1(), {{.*}} !prof !9
+  call void %1(), !dbg !17
+  ret void
+}
+
+!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+
+; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
+; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
+
+!llvm.module.flags = !{!12, !13}
+
+!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
+!11 = !DIFile(filename: "t", directory: "/")
+!12 = !{i32 7, !"Dwarf Version", i32 5}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
+!15 = !DILocation(line: 4, column: 5, scope: !14)
+!17 = !DILocation(line: 6, column: 5, scope: !14)
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
deleted file mode 100644
index da8d530c606de..0000000000000
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_fdo_conflict.test
+++ /dev/null
@@ -1,52 +0,0 @@
-; This test verifies that MemProf value profiles do not override existing FDO
-; value profile metadata on indirect calls, but do annotate calls that don't
-; have existing FDO metadata.
-
-; RUN: split-file %s %t
-; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_fdo_conflict.yaml -o %t/memprof_fdo_conflict.memprofdata
-; RUN: opt < %t/memprof_fdo_conflict.ll -passes='memprof-use<profile-filename=%t/memprof_fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s
-
-;--- memprof_fdo_conflict.yaml
----
-HeapProfileRecords:
-  - GUID:            _Z3barv
-    AllocSites:      []
-    CallSites:
-      - Frames:
-          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
-        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
-      - Frames:
-          - { Function: _Z3barv, LineOffset: 5, Column: 5, IsInlineFrame: false }
-        CalleeGuids:   [0x555556789abcdef0, 0x666656789abcdef1]
-...
-;--- memprof_fdo_conflict.ll
-define dso_local void @_Z3barv() !dbg !4 {
-entry:
-  %fp = alloca ptr, align 8
-  %0 = load ptr, ptr %fp, align 8
-  ; This call already has FDO value profile metadata - should NOT be modified
-  ; CHECK: call void %0(), {{.*}} !prof !6
-  ; CHECK-NOT: call void %0(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
-  call void %0(), !dbg !5, !prof !6
-
-  %1 = load ptr, ptr %fp, align 8
-  ; This call does NOT have existing metadata - should get MemProf annotation
-  ; CHECK: call void %1(), {{.*}} !prof ![[NEWPROF:[0-9]+]]
-  call void %1(), !dbg !7
-  ret void
-}
-
-!6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
-
-; CHECK: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
-; CHECK: ![[NEWPROF]] = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
-
-!llvm.module.flags = !{!2, !3}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
-!1 = !DIFile(filename: "t", directory: "/")
-!2 = !{i32 7, !"Dwarf Version", i32 5}
-!3 = !{i32 2, !"Debug Info Version", i32 3}
-!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
-!5 = !DILocation(line: 4, column: 5, scope: !4)
-!7 = !DILocation(line: 6, column: 5, scope: !4)
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
deleted file mode 100644
index 6a14ebb6755ab..0000000000000
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call_yaml.test
+++ /dev/null
@@ -1,39 +0,0 @@
-; Make sure that we can ingest the MemProf profile in YAML with CalleeGuids
-; and annotate an indirect call with value profile metadata.
-
-; RUN: split-file %s %t
-; RUN: llvm-profdata merge --memprof-version=4 %t/memprof_annotate_indirect_call_yaml.yaml -o %t/memprof_annotate_indirect_call_yaml.memprofdata
-; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
-; RUN: opt < %t/memprof_annotate_indirect_call_yaml.ll -passes='memprof-use<profile-filename=%t/memprof_annotate_indirect_call_yaml.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
-
-;--- memprof_annotate_indirect_call_yaml.yaml
----
-HeapProfileRecords:
-  - GUID:            _Z3barv
-    AllocSites:      []
-    CallSites:
-      - Frames:
-          - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
-        CalleeGuids:   [0x123456789abcdef0, 0x23456789abcdef01]
-...
-;--- memprof_annotate_indirect_call_yaml.ll
-define dso_local void @_Z3barv() !dbg !4 {
-entry:
-  %fp = alloca ptr, align 8
-  %0 = load ptr, ptr %fp, align 8
-  call void %0(), !dbg !5
-; CHECK-ENABLE: call void %0(), {{.*}} !prof ![[PROF:[0-9]+]]
-; CHECK-DISABLE-NOT: !prof
-  ret void
-}
-
-; CHECK-ENABLE: ![[PROF]] = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
-
-!llvm.module.flags = !{!2, !3}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
-!1 = !DIFile(filename: "t", directory: "/")
-!2 = !{i32 7, !"Dwarf Version", i32 5}
-!3 = !{i32 2, !"Debug Info Version", i32 3}
-!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
-!5 = !DILocation(line: 4, column: 5, scope: !4)

>From a6283e736835491f74b55e0648647c0c08ec656a Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Fri, 30 May 2025 22:23:35 -0700
Subject: [PATCH 09/10] Address comments.

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 13 +++++++++----
 .../PGOProfile/memprof_annotate_indirect_call.test  |  5 ++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 30d0b15d7acfe..bf26cd785b1e8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -978,8 +978,9 @@ static void addVPMetadata(Module &M, Instruction &I,
 
   if (I.getMetadata(LLVMContext::MD_prof)) {
     uint64_t Unused;
-    auto ExistingVD =
-        getValueProfDataFromInst(I, IPVK_IndirectCallTarget, ~0U, Unused);
+    // ~0U means get all available value profile data without any count limit
+    auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
+                                               /*MaxNumValueData=*/~0U, Unused);
     // We don't know how to merge value profile data yet.
     if (!ExistingVD.empty()) {
       return;
@@ -994,6 +995,8 @@ static void addVPMetadata(Module &M, Instruction &I,
     VD.Value = CalleeGUID;
     // For MemProf, we don't have actual call counts, so we assign
     // a weight of 1 to each potential target.
+    // TODO: Consider making this weight configurable or increasing it to
+    // improve effectiveness for ICP.
     VD.Count = 1;
     VDs.push_back(VD);
     TotalCount += VD.Count;
@@ -1083,6 +1086,9 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
     ArrayRef<GlobalValue::GUID> CalleeGuids;
 
     // Only compare Frame contents.
+    // Use pointer-based equality instead of ArrayRef's operator== which does
+    // element-wise comparison. We want to check if it's the same slice of the
+    // underlying array, not just equivalent content.
     bool operator==(const CallSiteEntry &Other) const {
       return Frames.data() == Other.Frames.data() &&
              Frames.size() == Other.Frames.size();
@@ -1271,9 +1277,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
 
           // Try to attach indirect call metadata if possible.
-          if (!CalledFunction) {
+          if (!CalledFunction)
             addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
-          }
 
           // Only need to find one with a matching call stack and add a single
           // callsite metadata.
diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
index 71a3a467a4128..ad83da285694a 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
+++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test
@@ -1,11 +1,11 @@
 ; RUN: split-file %s %t
 
-; COM: Basic functionality with flag toggle
+;; Basic functionality with flag toggle
 ; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
 ; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
 ; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
 
-; COM: FDO conflict handling
+;; FDO conflict handling
 ; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
 ; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
 
@@ -63,7 +63,6 @@ entry:
   %0 = load ptr, ptr %fp, align 8
   ; This call already has FDO value profile metadata - should NOT be modified
   ; CHECK-FDO: call void %0(), {{.*}} !prof !6
-  ; CHECK-FDO-NOT: call void %0(), {{.*}} !prof !9
   call void %0(), !dbg !15, !prof !16
 
   %1 = load ptr, ptr %fp, align 8

>From 2310c5318d5f28f3627d5595c4db7ae983d4479f Mon Sep 17 00:00:00 2001
From: Snehasish Kumar <snehasishk at google.com>
Date: Sat, 31 May 2025 10:31:38 -0700
Subject: [PATCH 10/10] Address review comment: change ~0U to 1 for efficiency

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index bf26cd785b1e8..1fcf64c3ada5a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -978,9 +978,11 @@ static void addVPMetadata(Module &M, Instruction &I,
 
   if (I.getMetadata(LLVMContext::MD_prof)) {
     uint64_t Unused;
-    // ~0U means get all available value profile data without any count limit
+    // TODO: When merging is implemented, increase this to a typical ICP value
+    // (e.g., 3-6) For now, we only need to check if existing data exists, so 1
+    // is sufficient
     auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
-                                               /*MaxNumValueData=*/~0U, Unused);
+                                               /*MaxNumValueData=*/1, Unused);
     // We don't know how to merge value profile data yet.
     if (!ExistingVD.empty()) {
       return;



More information about the llvm-commits mailing list