[llvm] [MemProf] Disable alloc context in combined summary for ndebug builds (PR #139161)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 16:01:17 PDT 2025
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/139161
>From 17b3989c782c8793b7a65396929e274dd406cc8d Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 8 May 2025 14:00:29 -0700
Subject: [PATCH 1/3] [MemProf] Disable alloc context in combined summary for
ndebug builds
Since we currently only use the context information in the alloc info
summary in the LTO backend for assertion checking, there is no need to
write this into the combined summary index for distributed ThinLTO for
NDEBUG builds. Put this under a new -combined-index-memprof-context
option which is off by default for NDEBUG.
The advantage is that we save time (not having to sort in preparation
for building the radix trees), and space in the generated bitcode files.
We could also do so for the callsite info records, but those are smaller
and less expensive to prepare.
---
llvm/include/llvm/Bitcode/LLVMBitCodes.h | 5 +
llvm/lib/AsmParser/LLParser.cpp | 15 +-
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 1 +
llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 7 +-
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 131 +++++++++++-------
.../test/Assembler/thinlto-memprof-summary.ll | 15 +-
.../ThinLTO/X86/memprof_direct_recursion.ll | 1 +
7 files changed, 112 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 73e74f3836ca4..b362a88963f6c 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -335,6 +335,11 @@ enum GlobalValueSummarySymtabCodes {
// CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
// [n x entry]
FS_CONTEXT_RADIX_TREE_ARRAY = 32,
+ // Summary of combined index allocation memprof metadata, without context.
+ // [nummib, numver,
+ // nummib x alloc type,
+ // numver x version]
+ FS_COMBINED_ALLOC_INFO_NO_CONTEXT = 33,
};
enum MetadataCodes {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 96f86eb52f15c..2dfc8254d5885 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -10781,12 +10781,15 @@ bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
return true;
SmallVector<unsigned> StackIdIndices;
- do {
- uint64_t StackId = 0;
- if (parseUInt64(StackId))
- return true;
- StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
- } while (EatIfPresent(lltok::comma));
+ // Combined index alloc records may not have a stack id list.
+ if (Lex.getKind() != lltok::rparen) {
+ do {
+ uint64_t StackId = 0;
+ if (parseUInt64(StackId))
+ return true;
+ StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
+ } while (EatIfPresent(lltok::comma));
+ }
if (parseToken(lltok::rparen, "expected ')' in stackIds"))
return true;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 032c0de3c7a00..fe9e0ddca7091 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -330,6 +330,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, STACK_IDS)
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
+ STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index de7e9bbe69bd4..a7fbb0c74cb1e 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8178,7 +8178,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}
- case bitc::FS_COMBINED_ALLOC_INFO: {
+ case bitc::FS_COMBINED_ALLOC_INFO:
+ case bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
unsigned NumMIBs = Record[I++];
@@ -8187,7 +8188,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
while (MIBsRead++ < NumMIBs) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
- auto StackIdList = parseAllocInfoContext(Record, I);
+ SmallVector<unsigned> StackIdList;
+ if (BitCode == bitc::FS_COMBINED_ALLOC_INFO)
+ StackIdList = parseAllocInfoContext(Record, I);
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
assert(Record.size() - I >= NumVersions);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 27ada0ddcd831..4c28146e1e69d 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -99,6 +99,21 @@ static cl::opt<bool> WriteRelBFToSummary(
"write-relbf-to-summary", cl::Hidden, cl::init(false),
cl::desc("Write relative block frequency to function summary "));
+// Since we only use the context information in the memprof summary records in
+// the LTO backends to do assertion checking, save time and space by only
+// serializing the context for non-NDEBUG builds.
+// TODO: Currently this controls writing context of the allocation info records,
+// which are larger and more expensive, but we should do this for the callsite
+// records as well.
+static cl::opt<bool>
+ CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
+#ifndef NDEBUG
+ cl::init(true),
+#else
+ cl::init(false),
+#endif
+ cl::desc(""));
+
namespace llvm {
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
}
@@ -528,10 +543,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
for (auto Idx : CI.StackIdIndices)
RecordStackIdReference(Idx);
}
- for (auto &AI : FS->allocs())
- for (auto &MIB : AI.MIBs)
- for (auto Idx : MIB.StackIdIndices)
- RecordStackIdReference(Idx);
+ if (CombinedIndexMemProfContext) {
+ for (auto &AI : FS->allocs())
+ for (auto &MIB : AI.MIBs)
+ for (auto Idx : MIB.StackIdIndices)
+ RecordStackIdReference(Idx);
+ }
});
}
@@ -4349,9 +4366,11 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.Versions.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
- // Record the index into the radix tree array for this context.
- assert(CallStackCount <= CallStackPos.size());
- Record.push_back(CallStackPos[CallStackCount++]);
+ if (PerModule || CombinedIndexMemProfContext) {
+ // Record the index into the radix tree array for this context.
+ assert(CallStackCount <= CallStackPos.size());
+ Record.push_back(CallStackPos[CallStackCount++]);
+ }
}
if (!PerModule)
llvm::append_range(Record, AI.Versions);
@@ -4384,8 +4403,11 @@ static void writeFunctionHeapProfileRecords(
Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
ContextIdAbbvId);
}
- Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
- : bitc::FS_COMBINED_ALLOC_INFO,
+ Stream.EmitRecord(PerModule
+ ? bitc::FS_PERMODULE_ALLOC_INFO
+ : (CombinedIndexMemProfContext
+ ? bitc::FS_COMBINED_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
Record, AllocAbbrev);
}
}
@@ -4847,7 +4869,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext
+ ? bitc::FS_COMBINED_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, context radix tree index),
@@ -4857,13 +4881,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
- Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
- // n x entry
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
- unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
if (DecSummaries == nullptr)
return false;
@@ -4900,44 +4917,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
};
- // First walk through all the functions and collect the allocation contexts in
- // their associated summaries, for use in constructing a radix tree of
- // contexts. Note that we need to do this in the same order as the functions
- // are processed further below since the call stack positions in the resulting
- // radix tree array are identified based on this order.
- MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
- forEachSummary([&](GVInfo I, bool IsAliasee) {
- // Don't collect this when invoked for an aliasee, as it is not needed for
- // the alias summary. If the aliasee is to be imported, we will invoke this
- // separately with IsAliasee=false.
- if (IsAliasee)
- return;
- GlobalValueSummary *S = I.second;
- assert(S);
- auto *FS = dyn_cast<FunctionSummary>(S);
- if (!FS)
- return;
- collectMemProfCallStacks(
- FS,
- /*GetStackIndex*/
- [&](unsigned I) {
- // Get the corresponding index into the list of StackIds actually
- // being written for this combined index (which may be a subset in
- // the case of distributed indexes).
- assert(StackIdIndicesToIndex.contains(I));
- return StackIdIndicesToIndex[I];
- },
- CallStacks);
- });
- // Finalize the radix tree, write it out, and get the map of positions in the
- // linearized tree array.
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
- if (!CallStacks.empty()) {
- CallStackPos =
- writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
+ if (CombinedIndexMemProfContext) {
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+ // n x entry
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // First walk through all the functions and collect the allocation contexts
+ // in their associated summaries, for use in constructing a radix tree of
+ // contexts. Note that we need to do this in the same order as the functions
+ // are processed further below since the call stack positions in the
+ // resulting radix tree array are identified based on this order.
+ MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+ forEachSummary([&](GVInfo I, bool IsAliasee) {
+ // Don't collect this when invoked for an aliasee, as it is not needed for
+ // the alias summary. If the aliasee is to be imported, we will invoke
+ // this separately with IsAliasee=false.
+ if (IsAliasee)
+ return;
+ GlobalValueSummary *S = I.second;
+ assert(S);
+ auto *FS = dyn_cast<FunctionSummary>(S);
+ if (!FS)
+ return;
+ collectMemProfCallStacks(
+ FS,
+ /*GetStackIndex*/
+ [&](unsigned I) {
+ // Get the corresponding index into the list of StackIds actually
+ // being written for this combined index (which may be a subset in
+ // the case of distributed indexes).
+ assert(StackIdIndicesToIndex.contains(I));
+ return StackIdIndicesToIndex[I];
+ },
+ CallStacks);
+ });
+ // Finalize the radix tree, write it out, and get the map of positions in
+ // the linearized tree array.
+ if (!CallStacks.empty()) {
+ CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream,
+ RadixAbbrev);
+ }
}
- // Keep track of the current index into the CallStackPos map.
+ // Keep track of the current index into the CallStackPos map. Not used if
+ // CombinedIndexMemProfContext is false.
CallStackId CallStackCount = 0;
DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll
index 69eafc967c2a3..4e4a928c1f024 100644
--- a/llvm/test/Assembler/thinlto-memprof-summary.ll
+++ b/llvm/test/Assembler/thinlto-memprof-summary.ll
@@ -1,5 +1,12 @@
;; Test memprof summary parsing (tests all types/fields in various combinations).
-; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s
+
+;; Force enable -combined-index-memprof-context to get the allocation context
+;; stack ids even in release builds.
+; RUN: llvm-as %s -o - -combined-index-memprof-context | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,CONTEXT
+
+;; Force disable -combined-index-memprof-context to block the allocation context
+;; stack ids even in non-release builds.
+; RUN: llvm-as %s -o - -combined-index-memprof-context=false | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,NOCONTEXT
; ModuleID = 'thinlto-memprof-summary.thinlto.bc'
@@ -17,8 +24,10 @@
; Make sure we get back from llvm-dis what we put in via llvm-as.
; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
-; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
+; CONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
+; NOCONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()), (type: hot, stackIds: ())))))))
; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
-; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+; CONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+; NOCONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: ()), (type: notcold, stackIds: ())))))))
; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))
diff --git a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
index 63139cacd8fba..fc7d04fcf4d58 100644
--- a/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
+++ b/llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
@@ -33,6 +33,7 @@
; RUN: llvm-lto2 run %t/b.o %t/a.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
+; RUN: -combined-index-memprof-context \
; RUN: -r=%t/b.o,_Z3fooi,plx \
; RUN: -r=%t/b.o,aliasee,plx \
; RUN: -r=%t/b.o,a \
>From 572e558f453e69d43c2eee132d5354a82c60c411 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Fri, 9 May 2025 13:15:29 -0700
Subject: [PATCH 2/3] Address comments
---
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4c28146e1e69d..9990cce812aa7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -105,6 +105,7 @@ static cl::opt<bool> WriteRelBFToSummary(
// TODO: Currently this controls writing context of the allocation info records,
// which are larger and more expensive, but we should do this for the callsite
// records as well.
+// FIXME: Convert to a const once this has undergone more sigificant testing.
static cl::opt<bool>
CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
#ifndef NDEBUG
@@ -4366,6 +4367,9 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.Versions.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
+ // The per-module summary always needs to include the alloc context, as we
+ // use it during the thin link. For the combined index it is optional (see
+ // comments where CombinedIndexMemProfContext is defined).
if (PerModule || CombinedIndexMemProfContext) {
// Record the index into the radix tree array for this context.
assert(CallStackCount <= CallStackPos.size());
>From d0c3b862fb2d5a7c9c6d05e8022650c7bfa503c1 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Fri, 9 May 2025 15:58:47 -0700
Subject: [PATCH 3/3] Flip condition
---
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9990cce812aa7..ef397879a132c 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -108,10 +108,10 @@ static cl::opt<bool> WriteRelBFToSummary(
// FIXME: Convert to a const once this has undergone more sigificant testing.
static cl::opt<bool>
CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
-#ifndef NDEBUG
- cl::init(true),
-#else
+#ifdef NDEBUG
cl::init(false),
+#else
+ cl::init(true),
#endif
cl::desc(""));
More information about the llvm-commits
mailing list