[llvm] a3d027f - [MemProf] Disable alloc context in combined summary for ndebug builds (#139161)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 16:28:53 PDT 2025
Author: Teresa Johnson
Date: 2025-05-09T16:28:49-07:00
New Revision: a3d027f92308890c9b1ace7b8a5a7f7d69ce5f0e
URL: https://github.com/llvm/llvm-project/commit/a3d027f92308890c9b1ace7b8a5a7f7d69ce5f0e
DIFF: https://github.com/llvm/llvm-project/commit/a3d027f92308890c9b1ace7b8a5a7f7d69ce5f0e.diff
LOG: [MemProf] Disable alloc context in combined summary for ndebug builds (#139161)
Since we currently only use the context information in the alloc info
summary in the LTO backend for assertion checking, there is no need to
write this into the combined summary index for distributed ThinLTO for
NDEBUG builds. Put this under a new -combined-index-memprof-context
option which is off by default for NDEBUG.
The advantage is that we save time (not having to sort in preparation
for building the radix trees), and space in the generated bitcode files.
We could also do so for the callsite info records, but those are smaller
and less expensive to prepare.
Added:
Modified:
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/test/Assembler/thinlto-memprof-summary.ll
llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 73e74f3836ca4..b362a88963f6c 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -335,6 +335,11 @@ enum GlobalValueSummarySymtabCodes {
// CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
// [n x entry]
FS_CONTEXT_RADIX_TREE_ARRAY = 32,
+ // Summary of combined index allocation memprof metadata, without context.
+ // [nummib, numver,
+ // nummib x alloc type,
+ // numver x version]
+ FS_COMBINED_ALLOC_INFO_NO_CONTEXT = 33,
};
enum MetadataCodes {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 96f86eb52f15c..2dfc8254d5885 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -10781,12 +10781,15 @@ bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
return true;
SmallVector<unsigned> StackIdIndices;
- do {
- uint64_t StackId = 0;
- if (parseUInt64(StackId))
- return true;
- StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
- } while (EatIfPresent(lltok::comma));
+ // Combined index alloc records may not have a stack id list.
+ if (Lex.getKind() != lltok::rparen) {
+ do {
+ uint64_t StackId = 0;
+ if (parseUInt64(StackId))
+ return true;
+ StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
+ } while (EatIfPresent(lltok::comma));
+ }
if (parseToken(lltok::rparen, "expected ')' in stackIds"))
return true;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 032c0de3c7a00..fe9e0ddca7091 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -330,6 +330,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, STACK_IDS)
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
+ STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index de7e9bbe69bd4..a7fbb0c74cb1e 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8178,7 +8178,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}
- case bitc::FS_COMBINED_ALLOC_INFO: {
+ case bitc::FS_COMBINED_ALLOC_INFO:
+ case bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
unsigned NumMIBs = Record[I++];
@@ -8187,7 +8188,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
while (MIBsRead++ < NumMIBs) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- auto StackIdList = parseAllocInfoContext(Record, I);
+ SmallVector<unsigned> StackIdList;
+ if (BitCode == bitc::FS_COMBINED_ALLOC_INFO)
+ StackIdList = parseAllocInfoContext(Record, I);
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
assert(Record.size() - I >= NumVersions);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 27ada0ddcd831..ef397879a132c 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -99,6 +99,22 @@ static cl::opt<bool> WriteRelBFToSummary(
"write-relbf-to-summary", cl::Hidden, cl::init(false),
cl::desc("Write relative block frequency to function summary "));
+// Since we only use the context information in the memprof summary records in
+// the LTO backends to do assertion checking, save time and space by only
+// serializing the context for non-NDEBUG builds.
+// TODO: Currently this controls writing context of the allocation info records,
+// which are larger and more expensive, but we should do this for the callsite
+// records as well.
+// FIXME: Convert to a const once this has undergone more sigificant testing.
+static cl::opt<bool>
+ CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
+#ifdef NDEBUG
+ cl::init(false),
+#else
+ cl::init(true),
+#endif
+ cl::desc(""));
+
namespace llvm {
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
}
@@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
for (auto Idx : CI.StackIdIndices)
RecordStackIdReference(Idx);
}
- for (auto &AI : FS->allocs())
- for (auto &MIB : AI.MIBs)
- for (auto Idx : MIB.StackIdIndices)
- RecordStackIdReference(Idx);
+ if (CombinedIndexMemProfContext) {
+ for (auto &AI : FS->allocs())
+ for (auto &MIB : AI.MIBs)
+ for (auto Idx : MIB.StackIdIndices)
+ RecordStackIdReference(Idx);
+ }
});
}
@@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.Versions.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
- // Record the index into the radix tree array for this context.
- assert(CallStackCount <= CallStackPos.size());
- Record.push_back(CallStackPos[CallStackCount++]);
+ // The per-module summary always needs to include the alloc context, as we
+ // use it during the thin link. For the combined index it is optional (see
+ // comments where CombinedIndexMemProfContext is defined).
+ if (PerModule || CombinedIndexMemProfContext) {
+ // Record the index into the radix tree array for this context.
+ assert(CallStackCount <= CallStackPos.size());
+ Record.push_back(CallStackPos[CallStackCount++]);
+ }
}
if (!PerModule)
llvm::append_range(Record, AI.Versions);
@@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords(
Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
ContextIdAbbvId);
}
- Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
- : bitc::FS_COMBINED_ALLOC_INFO,
+ Stream.EmitRecord(PerModule
+ ? bitc::FS_PERMODULE_ALLOC_INFO
+ : (CombinedIndexMemProfContext
+ ? bitc::FS_COMBINED_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
Record, AllocAbbrev);
}
}
@@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext
+ ? bitc::FS_COMBINED_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, context radix tree index),
@@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
- // n x entry
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
if (DecSummaries == nullptr)
return false;
@@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
};
- // First walk through all the functions and collect the allocation contexts in
- // their associated summaries, for use in constructing a radix tree of
- // contexts. Note that we need to do this in the same order as the functions
- // are processed further below since the call stack positions in the resulting
- // radix tree array are identified based on this order.
- MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
- forEachSummary([&](GVInfo I, bool IsAliasee) {
- // Don't collect this when invoked for an aliasee, as it is not needed for
- // the alias summary. If the aliasee is to be imported, we will invoke this
- // separately with IsAliasee=false.
- if (IsAliasee)
- return;
- GlobalValueSummary *S = I.second;
- assert(S);
- auto *FS = dyn_cast<FunctionSummary>(S);
- if (!FS)
- return;
- collectMemProfCallStacks(
- FS,
- /*GetStackIndex*/
- [&](unsigned I) {
- // Get the corresponding index into the list of StackIds actually
- // being written for this combined index (which may be a subset in
- // the case of distributed indexes).
- assert(StackIdIndicesToIndex.contains(I));
- return StackIdIndicesToIndex[I];
- },
- CallStacks);
- });
- // Finalize the radix tree, write it out, and get the map of positions in the
- // linearized tree array.
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
- if (!CallStacks.empty()) {
- CallStackPos =
- writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
+ if (CombinedIndexMemProfContext) {
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+ // n x entry
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // First walk through all the functions and collect the allocation contexts
+ // in their associated summaries, for use in constructing a radix tree of
+ // contexts. Note that we need to do this in the same order as the functions
+ // are processed further below since the call stack positions in the
+ // resulting radix tree array are identified based on this order.
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+ forEachSummary([&](GVInfo I, bool IsAliasee) {
+ // Don't collect this when invoked for an aliasee, as it is not needed for
+ // the alias summary. If the aliasee is to be imported, we will invoke
+ // this separately with IsAliasee=false.
+ if (IsAliasee)
+ return;
+ GlobalValueSummary *S = I.second;
+ assert(S);
+ auto *FS = dyn_cast<FunctionSummary>(S);
+ if (!FS)
+ return;
+ collectMemProfCallStacks(
+ FS,
+ /*GetStackIndex*/
+ [&](unsigned I) {
+ // Get the corresponding index into the list of StackIds actually
+ // being written for this combined index (which may be a subset in
+ // the case of distributed indexes).
+ assert(StackIdIndicesToIndex.contains(I));
+ return StackIdIndicesToIndex[I];
+ },
+ CallStacks);
+ });
+ // Finalize the radix tree, write it out, and get the map of positions in
+ // the linearized tree array.
+ if (!CallStacks.empty()) {
+ CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream,
+ RadixAbbrev);
+ }
}
- // Keep track of the current index into the CallStackPos map.
+ // Keep track of the current index into the CallStackPos map. Not used if
+ // CombinedIndexMemProfContext is false.
CallStackId CallStackCount = 0;
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll
index 69eafc967c2a3..4e4a928c1f024 100644
--- a/llvm/test/Assembler/thinlto-memprof-summary.ll
+++ b/llvm/test/Assembler/thinlto-memprof-summary.ll
@@ -1,5 +1,12 @@
;; Test memprof summary parsing (tests all types/fields in various combinations).
-; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s
+
+;; Force enable -combined-index-memprof-context to get the allocation context
+;; stack ids even in release builds.
+; RUN: llvm-as %s -o - -combined-index-memprof-context | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,CONTEXT
+
+;; Force disable -combined-index-memprof-context to block the allocation context
+;; stack ids even in non-release builds.
+; RUN: llvm-as %s -o - -combined-index-memprof-context=false | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,NOCONTEXT
; ModuleID = 'thinlto-memprof-summary.thinlto.bc'
@@ -17,8 +24,10 @@
; Make sure we get back from llvm-dis what we put in via llvm-as.
; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
-; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
+; CONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
+; NOCONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()), (type: hot, stackIds: ())))))))
; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
-; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+; CONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+; NOCONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: ()), (type: notcold, stackIds: ())))))))
; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))
diff --git a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
index 63139cacd8fba..fc7d04fcf4d58 100644
--- a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
+++ b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
@@ -33,6 +33,7 @@
; RUN: llvm-lto2 run %t/b.o %t/a.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
+; RUN: -combined-index-memprof-context \
; RUN: -r=%t/b.o,_Z3fooi,plx \
; RUN: -r=%t/b.o,aliasee,plx \
; RUN: -r=%t/b.o,a \
More information about the llvm-commits
mailing list