[llvm] [MemProf] Emit remarks when hinting allocations not needing cloning (PR #141859)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 14:22:45 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Teresa Johnson (teresajohnson)
<details>
<summary>Changes</summary>
The context disambiguation code already emits remarks when hinting
allocations (by adding hotness attributes) during cloning. However,
we did not yet emit hints when applying the hotness attributes during
building of the metadata (during matching and again after inlining).
Add remarks when we apply the hint attributes for these
non-context-sensitive allocations.
---
Full diff: https://github.com/llvm/llvm-project/pull/141859.diff
8 Files Affected:
- (modified) llvm/include/llvm/Analysis/MemoryProfileInfo.h (+8-1)
- (modified) llvm/include/llvm/Transforms/Utils/Cloning.h (+3-1)
- (modified) llvm/lib/Analysis/MemoryProfileInfo.cpp (+10-8)
- (modified) llvm/lib/Transforms/IPO/Inliner.cpp (+4-3)
- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+6-3)
- (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+13-9)
- (modified) llvm/test/Transforms/Inline/memprof_inline.ll (+6-1)
- (modified) llvm/test/Transforms/PGOProfile/memprof.ll (+5-2)
``````````diff
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index bf1cfb1ee52bb..70e5974013e92 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -19,6 +19,9 @@
#include <map>
namespace llvm {
+
+class OptimizationRemarkEmitter;
+
namespace memprof {
/// Return the allocation type for a given set of memory profile values.
@@ -85,6 +88,10 @@ class CallStackTrie {
// The allocation's leaf stack id.
uint64_t AllocStackId = 0;
+ // If the client provides a remarks emitter object, we will emit remarks on
+ // allocations for which we apply non-context sensitive allocation hints.
+ OptimizationRemarkEmitter *ORE;
+
void deleteTrieNode(CallStackTrieNode *Node) {
if (!Node)
return;
@@ -111,7 +118,7 @@ class CallStackTrie {
uint64_t &ColdBytes);
public:
- CallStackTrie() = default;
+ CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {}
~CallStackTrie() { deleteTrieNode(Alloc); }
bool empty() const { return Alloc == nullptr; }
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 24f7ae7f07c24..58a5faf57a361 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -42,6 +42,7 @@ class Instruction;
class Loop;
class LoopInfo;
class Module;
+class OptimizationRemarkEmitter;
class PGOContextualProfile;
class ProfileSummaryInfo;
class ReturnInst;
@@ -314,7 +315,8 @@ InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes = false,
AAResults *CalleeAAR = nullptr,
bool InsertLifetime = true,
- Function *ForwardVarArgsTo = nullptr);
+ Function *ForwardVarArgsTo = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr);
/// Same as above, but it will update the contextual profile. If the contextual
/// profile is invalid (i.e. not loaded because it is not present), it defaults
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 773d0b2f53e09..05aff85501093 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -145,13 +146,6 @@ std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
llvm_unreachable("invalid alloc type");
}
-static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI,
- AllocationType AllocType) {
- auto AllocTypeString = getAllocTypeAttributeString(AllocType);
- auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString);
- CI->addFnAttr(A);
-}
-
bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
const unsigned NumAllocTypes = llvm::popcount(AllocTypes);
assert(NumAllocTypes != 0);
@@ -475,7 +469,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
StringRef Descriptor) {
- addAllocTypeAttribute(CI->getContext(), CI, AT);
+ auto AllocTypeString = getAllocTypeAttributeString(AT);
+ auto A = llvm::Attribute::get(CI->getContext(), "memprof", AllocTypeString);
+ CI->addFnAttr(A);
if (MemProfReportHintedSizes) {
std::vector<ContextTotalSize> ContextSizeInfo;
collectContextSizeInfo(Alloc, ContextSizeInfo);
@@ -485,6 +481,12 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
<< getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n";
}
}
+ if (ORE)
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CI)
+ << ore::NV("AllocationCall", CI) << " in function "
+ << ore::NV("Caller", CI->getFunction())
+ << " marked with memprof allocation attribute "
+ << ore::NV("Attribute", AllocTypeString));
}
// Build and attach the minimal necessary MIB metadata. If the alloc has a
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 6b819a447bd77..6554377a4c155 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -382,9 +382,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- InlineResult IR =
- InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
- &FAM.getResult<AAManager>(*CB->getCaller()));
+ InlineResult IR = InlineFunction(
+ *CB, IFI, /*MergeAttributes=*/true,
+ &FAM.getResult<AAManager>(*CB->getCaller()), true, nullptr,
+ &FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB->getCaller()));
if (!IR.isSuccess()) {
Advice->recordUnsuccessfulInlining(IR);
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 5982476f3994e..037d584a367c4 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
@@ -968,7 +969,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
std::set<std::vector<uint64_t>> &MatchedCallSites,
- DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
+ DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+ OptimizationRemarkEmitter &ORE) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1135,7 +1137,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
- CallStackTrie AllocTrie;
+ CallStackTrie AllocTrie(&ORE);
uint64_t TotalSize = 0;
uint64_t TotalColdSize = 0;
for (auto *AllocInfo : AllocInfoIter->second) {
@@ -1282,8 +1284,9 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
continue;
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
- MatchedCallSites, UndriftMaps);
+ MatchedCallSites, UndriftMaps, ORE);
}
if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 21467a909af10..7a9605bf5f8d4 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -828,12 +828,13 @@ static void removeCallsiteMetadata(CallBase *Call) {
}
static void updateMemprofMetadata(CallBase *CI,
- const std::vector<Metadata *> &MIBList) {
+ const std::vector<Metadata *> &MIBList,
+ OptimizationRemarkEmitter *ORE) {
assert(!MIBList.empty());
// Remove existing memprof, which will either be replaced or may not be needed
// if we are able to use a single allocation type function attribute.
removeMemProfMetadata(CI);
- CallStackTrie CallStack;
+ CallStackTrie CallStack(ORE);
for (Metadata *MIB : MIBList)
CallStack.addCallStack(cast<MDNode>(MIB));
bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
@@ -848,7 +849,8 @@ static void updateMemprofMetadata(CallBase *CI,
// the call that was inlined.
static void propagateMemProfHelper(const CallBase *OrigCall,
CallBase *ClonedCall,
- MDNode *InlinedCallsiteMD) {
+ MDNode *InlinedCallsiteMD,
+ OptimizationRemarkEmitter *ORE) {
MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);
MDNode *ClonedCallsiteMD = nullptr;
// Check if the call originally had callsite metadata, and update it for the
@@ -891,7 +893,7 @@ static void propagateMemProfHelper(const CallBase *OrigCall,
return;
}
if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
- updateMemprofMetadata(ClonedCall, NewMIBList);
+ updateMemprofMetadata(ClonedCall, NewMIBList, ORE);
}
// Update memprof related metadata (!memprof and !callsite) based on the
@@ -902,7 +904,8 @@ static void propagateMemProfHelper(const CallBase *OrigCall,
static void
propagateMemProfMetadata(Function *Callee, CallBase &CB,
bool ContainsMemProfMetadata,
- const ValueMap<const Value *, WeakTrackingVH> &VMap) {
+ const ValueMap<const Value *, WeakTrackingVH> &VMap,
+ OptimizationRemarkEmitter *ORE) {
MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);
// Only need to update if the inlined callsite had callsite metadata, or if
// there was any memprof metadata inlined.
@@ -925,7 +928,7 @@ propagateMemProfMetadata(Function *Callee, CallBase &CB,
removeCallsiteMetadata(ClonedCall);
continue;
}
- propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);
+ propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD, ORE);
}
}
@@ -2473,7 +2476,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
bool MergeAttributes,
AAResults *CalleeAAR,
bool InsertLifetime,
- Function *ForwardVarArgsTo) {
+ Function *ForwardVarArgsTo,
+ OptimizationRemarkEmitter *ORE) {
assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
// FIXME: we don't inline callbr yet.
@@ -2807,8 +2811,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// inlined function which use the same param.
AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
- propagateMemProfMetadata(CalledFunc, CB,
- InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
+ propagateMemProfMetadata(
+ CalledFunc, CB, InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
// Propagate metadata on the callsite if necessary.
PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
diff --git a/llvm/test/Transforms/Inline/memprof_inline.ll b/llvm/test/Transforms/Inline/memprof_inline.ll
index 7383264a8f2e7..5bca822d64601 100644
--- a/llvm/test/Transforms/Inline/memprof_inline.ll
+++ b/llvm/test/Transforms/Inline/memprof_inline.ll
@@ -28,7 +28,12 @@
;; }
-; RUN: opt -passes=inline %s -S | FileCheck %s
+;; Also check that remarks are emitted when the allocations are hinted.
+; RUN: opt -passes=inline -pass-remarks=memory-profile-info %s -S 2>&1 | FileCheck %s
+
+; CHECK: remark: memprof_inline.cc:5:10: call in function _Z4foo2v marked with memprof allocation attribute cold
+; CHECK: remark: memprof_inline.cc:5:10: call in function main marked with memprof allocation attribute notcold
+; CHECK: remark: memprof_inline.cc:5:10: call in function main marked with memprof allocation attribute cold
; ModuleID = 'memprof_inline.cc'
source_filename = "memprof_inline.cc"
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 4a3ddcc38b263..f2e7a486bbc40 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -64,8 +64,9 @@
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
;; Check that the total sizes are reported if requested. A message should be
-;; emitted for the pruned context.
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZENOKEEPALL
+;; emitted for the pruned context. Also check that remarks are emitted for the
+;; allocations hinted without context sensitivity.
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -pass-remarks=memory-profile-info 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZENOKEEPALL,REMARKSINGLE
;; Check that the total sizes are reported if requested, and prevent pruning
;; via -memprof-keep-all-not-cold-contexts.
@@ -386,7 +387,9 @@ for.end: ; preds = %for.cond
; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
+; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold
; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
+; REMARKSINGLE: remark: memprof.cc:26:13: call in function main marked with memprof allocation attribute cold
;; For context sensitive allocations the full context hash and size in bytes
;; are in separate metadata nodes included on the MIB metadata.
; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}
``````````
</details>
https://github.com/llvm/llvm-project/pull/141859
More information about the llvm-commits
mailing list