[llvm] ff7b42c - [memprof] Speed up llvm-profdata (#117446)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 21:08:57 PST 2024
Author: Kazu Hirata
Date: 2024-11-24T21:08:54-08:00
New Revision: ff7b42c194e0fa23e6a76f0a33a80c0c3af14e7d
URL: https://github.com/llvm/llvm-project/commit/ff7b42c194e0fa23e6a76f0a33a80c0c3af14e7d
DIFF: https://github.com/llvm/llvm-project/commit/ff7b42c194e0fa23e6a76f0a33a80c0c3af14e7d.diff
LOG: [memprof] Speed up llvm-profdata (#117446)
CallStackRadixTreeBuilder::build takes the parameter
MemProfFrameIndexes by value, involving copies:
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes
Then "build" makes another copy of MemProfFrameIndexe and passes it to
encodeCallStack for every call stack, which is painfully slow.
This patch changes the type to a pointer so that we don't have to make
a copy every time we pass the argument.
Without this patch, it takes 553 seconds to run "llvm-profdata merge"
on a large MemProf raw profile. This patch shortenes that down to 67
seconds.
Added:
Modified:
llvm/include/llvm/ProfileData/MemProf.h
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/ProfileData/InstrProfWriter.cpp
llvm/lib/ProfileData/MemProf.cpp
llvm/unittests/ProfileData/MemProfTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 73a9586aaa6448..dd4d90864a08c4 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1117,21 +1117,20 @@ template <typename FrameIdTy> class CallStackRadixTreeBuilder {
// Encode a call stack into RadixArray. Return the starting index within
// RadixArray.
- LinearCallStackId
- encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
- const llvm::SmallVector<FrameIdTy> *Prev,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes);
+ LinearCallStackId encodeCallStack(
+ const llvm::SmallVector<FrameIdTy> *CallStack,
+ const llvm::SmallVector<FrameIdTy> *Prev,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
public:
CallStackRadixTreeBuilder() = default;
// Build a radix tree array.
- void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
- &&MemProfCallStackData,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes,
- llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
+ void
+ build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
+ &&MemProfCallStackData,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
+ llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8f22a50a5e0245..63f4e34074e06b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4235,7 +4235,7 @@ static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
CallStackRadixTreeBuilder<LinearFrameId> Builder;
// We don't need a MemProfFrameIndexes map as we have already converted the
// full stack id hash to a linear offset into the StackIds array.
- Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
+ Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr,
FrameHistogram);
Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
RadixAbbrev);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 4470e2c58105c2..d8ab18d213e3d7 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -641,7 +641,7 @@ writeMemProfCallStackArray(
MemProfCallStackIndexes;
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
for (auto I : Builder.getRadixArray())
OS.write32(I);
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 20cc4eececc9b1..1c240c3858cc76 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -335,8 +335,7 @@ template <typename FrameIdTy>
LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
const llvm::SmallVector<FrameIdTy> *CallStack,
const llvm::SmallVector<FrameIdTy> *Prev,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes) {
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes) {
// Compute the length of the common root prefix between Prev and CallStack.
uint32_t CommonLen = 0;
if (Prev) {
@@ -381,8 +380,7 @@ template <typename FrameIdTy>
void CallStackRadixTreeBuilder<FrameIdTy>::build(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
// Take the vector portion of MemProfCallStackData. The vector is exactly
// what we need to sort. Also, we no longer need its lookup capability.
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index c3f35e41b5fc74..7b9910e295df9d 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -628,7 +628,7 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
const auto Mappings = Builder.takeCallStackPos();
@@ -646,7 +646,7 @@ TEST(MemProf, RadixTreeBuilderOne) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
3U, // Size of CS1,
@@ -673,7 +673,7 @@ TEST(MemProf, RadixTreeBuilderTwo) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
testing::ElementsAreArray({
@@ -711,7 +711,7 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
testing::ElementsAreArray({
More information about the llvm-commits
mailing list