[llvm] [MemProf] Change the STACK_ID record to fixed width values (PR #116448)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 21:57:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-lto
Author: Teresa Johnson (teresajohnson)
<details>
<summary>Changes</summary>
The stack ids are hashes that are close to 64 bits in size, so emitting
as a pair of 32-bit fixed-width values is more efficient than a VBR.
This reduced the summary bitcode size for a large target by about 1%.
Bump the index version and ensure we can read the old format.
---
Full diff: https://github.com/llvm/llvm-project/pull/116448.diff
6 Files Affected:
- (modified) llvm/include/llvm/IR/ModuleSummaryIndex.h (+1-1)
- (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+10-1)
- (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+20-7)
- (modified) llvm/test/Bitcode/summary_version.ll (+1-1)
- (added) llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc ()
- (added) llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll (+20)
``````````diff
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 50def0eaf78867..39c60229aa1d81 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1463,7 +1463,7 @@ class ModuleSummaryIndex {
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
- static constexpr uint64_t BitcodeSummaryVersion = 11;
+ static constexpr uint64_t BitcodeSummaryVersion = 12;
// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9ca76b54a88d9d..3e6abacac27261 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7997,7 +7997,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_STACK_IDS: { // [n x stackid]
// Save stack ids in the reader to consult when adding stack ids from the
// lists in the stack node and alloc node entries.
- StackIds = ArrayRef<uint64_t>(Record);
+ if (Version <= 11) {
+ StackIds = ArrayRef<uint64_t>(Record);
+ break;
+ }
+ // This is an array of 32-bit fixed-width values, holding each 64-bit
+ // context id as a pair of adjacent (most significant first) 32-bit words.
+ assert(Record.size() % 2 == 0);
+ StackIds.reserve(Record.size() / 2);
+ for (auto R = Record.begin(); R != Record.end(); R += 2)
+ StackIds.push_back(*R << 32 | *(R + 1));
break;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5829af39cf5e2c..24a4c2e8303d5a 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4429,12 +4429,17 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- // FIXME: The stack ids are hashes that are close to 64 bits in size, so
- // emitting as a pair of 32-bit fixed-width values, as we do for context
- // ids, would be more efficient.
- StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ // The stack ids are hashes that are close to 64 bits in size, so emitting
+ // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
- Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
+ SmallVector<uint32_t> Vals;
+ Vals.reserve(Index->stackIds().size() * 2);
+ for (auto Id : Index->stackIds()) {
+ Vals.push_back(static_cast<uint32_t>(Id >> 32));
+ Vals.push_back(static_cast<uint32_t>(Id));
+ }
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
}
// n x context id
@@ -4624,9 +4629,17 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ // The stack ids are hashes that are close to 64 bits in size, so emitting
+ // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
- Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
+ SmallVector<uint32_t> Vals;
+ Vals.reserve(StackIds.size() * 2);
+ for (auto Id : StackIds) {
+ Vals.push_back(static_cast<uint32_t>(Id >> 32));
+ Vals.push_back(static_cast<uint32_t>(Id));
+ }
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
}
// Abbrev for FS_COMBINED_PROFILE.
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index c8d36f812c208a..c95c145a08788b 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=11/>
+; CHECK: <VERSION op0=12/>
diff --git a/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc b/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc
new file mode 100644
index 00000000000000..78b134cb44216e
Binary files /dev/null and b/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc differ
diff --git a/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll b/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll
new file mode 100644
index 00000000000000..10048f8674a080
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll
@@ -0,0 +1,20 @@
+;; Check that we can read the old STACK_ID summary format that encoded the id as
+;; a VBR8 instead of as a pair of 32-bit fixed-width values.
+;;
+;; The old bitcode was generated by the older compiler from `opt -thinlto-bc`
+;; on the following LLVM assembly:
+;;
+;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+;; target triple = "x86_64-unknown-linux-gnu"
+;;
+;; define void @bar() {
+;; call void @foo(), !callsite !0
+;; ret void
+;; }
+;;
+;; declare void @foo()
+;;
+;; !0 = !{i64 9086428284934609951}
+
+; RUN: llvm-dis %S/Inputs/memprof-old-stackid-summary.bc -o - | FileCheck %s
+; CHECK: stackIds: (9086428284934609951)
``````````
</details>
https://github.com/llvm/llvm-project/pull/116448
More information about the llvm-commits
mailing list