[llvm] [BOLT][NFCI] Strip suffix in getLTOCommonName (PR #106243)
Amir Ayupov via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 17:51:08 PDT 2024
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/106243
>From 1f2e9a33d80616e7acdc68f1956a37f82abecd9e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 27 Aug 2024 09:28:19 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
bolt/include/bolt/Utils/Utils.h | 6 +++++-
bolt/lib/Utils/Utils.cpp | 7 ++++---
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h
index 3886c5f8757c08..63de3c5ee8f123 100644
--- a/bolt/include/bolt/Utils/Utils.h
+++ b/bolt/include/bolt/Utils/Utils.h
@@ -64,8 +64,12 @@ std::string getUnescapedName(const StringRef &Name);
/// of functions. Later, out of all matching profiles we pick the one with the
/// best match.
///
+static SmallVector<StringRef, 4> LTOSuffixes({".__uniq.", ".lto_priv.",
+ ".constprop.", ".llvm."});
/// Return a common part of LTO name for a given \p Name.
-std::optional<StringRef> getLTOCommonName(const StringRef Name);
+std::optional<StringRef>
+getLTOCommonName(const StringRef Name,
+ ArrayRef<StringRef> Suffixes = LTOSuffixes);
// Determines which register a given DWARF expression is being assigned to.
// If the expression is defining the CFA, return std::nullopt.
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index 718e97535fd22a..a4b6a6a317e2cb 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -66,11 +66,12 @@ std::string getUnescapedName(const StringRef &Name) {
return Output;
}
-std::optional<StringRef> getLTOCommonName(const StringRef Name) {
- for (StringRef Suffix : {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}) {
+std::optional<StringRef> getLTOCommonName(const StringRef Name,
+ ArrayRef<StringRef> Suffixes) {
+ for (StringRef Suffix : Suffixes) {
size_t LTOSuffixPos = Name.find(Suffix);
if (LTOSuffixPos != StringRef::npos)
- return Name.substr(0, LTOSuffixPos + Suffix.size());
+ return Name.substr(0, LTOSuffixPos);
}
return std::nullopt;
}
>From 6eff5fe988ce505903a3836e86f274c4a1616fb3 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 27 Aug 2024 15:35:35 -0700
Subject: [PATCH 2/4] define getCommonName
Created using spr 1.3.4
---
bolt/include/bolt/Utils/Utils.h | 11 ++++++-----
bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 11 +++++++++--
bolt/lib/Utils/Utils.cpp | 11 ++++++++---
3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h
index 63de3c5ee8f123..9baee7d94066de 100644
--- a/bolt/include/bolt/Utils/Utils.h
+++ b/bolt/include/bolt/Utils/Utils.h
@@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name);
/// Return the unescaped name
std::string getUnescapedName(const StringRef &Name);
+/// Return a common part for a given \p Name wrt a given \p Suffixes list.
+/// Preserve the suffix if \p KeepSuffix is set, only dropping characters
+/// following it, otherwise drop the suffix as well.
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+ ArrayRef<StringRef> Suffixes);
/// LTO-generated function names take a form:
///
/// <function_name>.lto_priv.<decimal_number>/...
@@ -64,12 +69,8 @@ std::string getUnescapedName(const StringRef &Name);
/// of functions. Later, out of all matching profiles we pick the one with the
/// best match.
///
-static SmallVector<StringRef, 4> LTOSuffixes({".__uniq.", ".lto_priv.",
- ".constprop.", ".llvm."});
/// Return a common part of LTO name for a given \p Name.
-std::optional<StringRef>
-getLTOCommonName(const StringRef Name,
- ArrayRef<StringRef> Suffixes = LTOSuffixes);
+std::optional<StringRef> getLTOCommonName(const StringRef Name);
// Determines which register a given DWARF expression is being assigned to.
// If the expression is defining the CFA, return std::nullopt.
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..6e80d9b0014b7b 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -14,6 +14,7 @@
#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "bolt/Utils/Utils.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
@@ -133,10 +134,16 @@ void PseudoProbeRewriter::parsePseudoProbe() {
MCPseudoProbeDecoder::Uint64Set GuidFilter;
MCPseudoProbeDecoder::Uint64Map FuncStartAddrs;
+ SmallVector<StringRef, 3> Suffixes({".llvm.", ".destroy", ".resume"});
for (const BinaryFunction *F : BC.getAllBinaryFunctions()) {
for (const MCSymbol *Sym : F->getSymbols()) {
- FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] =
- F->getAddress();
+ StringRef SymName = NameResolver::restore(Sym->getName());
+ if (std::optional<StringRef> CommonName =
+ getCommonName(SymName, false, Suffixes)) {
+ SymName = *CommonName;
+ }
+ uint64_t GUID = Function::getGUID(SymName);
+ FuncStartAddrs[GUID] = F->getAddress();
}
}
Contents = PseudoProbeSection->getContents();
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index a4b6a6a317e2cb..ecc2f1010a9858 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -66,16 +66,21 @@ std::string getUnescapedName(const StringRef &Name) {
return Output;
}
-std::optional<StringRef> getLTOCommonName(const StringRef Name,
- ArrayRef<StringRef> Suffixes) {
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+ ArrayRef<StringRef> Suffixes) {
for (StringRef Suffix : Suffixes) {
size_t LTOSuffixPos = Name.find(Suffix);
if (LTOSuffixPos != StringRef::npos)
- return Name.substr(0, LTOSuffixPos);
+ return Name.substr(0, LTOSuffixPos + (KeepSuffix ? Suffix.size() : 0));
}
return std::nullopt;
}
+std::optional<StringRef> getLTOCommonName(const StringRef Name) {
+ return getCommonName(Name, true,
+ {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."});
+}
+
std::optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) {
uint8_t Opcode = ExprBytes[0];
if (Opcode == dwarf::DW_CFA_def_cfa_expression)
>From 04102f20172912980c5baf0033459e2086fbe0f2 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 2 Sep 2024 17:49:19 -0700
Subject: [PATCH 3/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
bolt/lib/Profile/DataAggregator.cpp | 4 +-
bolt/lib/Profile/YAMLProfileReader.cpp | 5 --
bolt/lib/Profile/YAMLProfileWriter.cpp | 11 ++--
bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 6 +--
.../test/X86/pseudoprobe-decoding-inline.test | 6 +--
llvm/lib/MC/MCPseudoProbe.cpp | 50 +++++++++++++++++++
6 files changed, 66 insertions(+), 16 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..10d745cc69824b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -88,7 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;
cl::opt<bool> ReadPreAggregated(
@@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
yaml::bolt::BinaryProfile BP;
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
// Fill out the header info.
BP.Header.Version = 1;
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3eca5e972fa5ba..604a9fb4813be4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -49,11 +49,6 @@ llvm::cl::opt<bool>
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));
-
-llvm::cl::opt<bool> ProfileUsePseudoProbes(
- "profile-use-pseudo-probes",
- cl::desc("Use pseudo probes for profile generation and matching"),
- cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f74cf60e076d0a..ffbf2388e912fb 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,6 +13,7 @@
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,12 @@
#define DEBUG_TYPE "bolt-prof"
namespace opts {
-extern llvm::cl::opt<bool> ProfileUseDFS;
-extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
+using namespace llvm;
+extern cl::opt<bool> ProfileUseDFS;
+cl::opt<bool> ProfileWritePseudoProbes(
+ "profile-write-pseudo-probes",
+ cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
+ cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..fef721167869dd 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -49,7 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
} // namespace opts
namespace {
@@ -90,14 +90,14 @@ class PseudoProbeRewriter final : public MetadataRewriter {
};
Error PseudoProbeRewriter::preCFGInitializer() {
- if (opts::ProfileUsePseudoProbes)
+ if (opts::ProfileWritePseudoProbes)
parsePseudoProbe();
return Error::success();
}
Error PseudoProbeRewriter::postEmitFinalizer() {
- if (!opts::ProfileUsePseudoProbes)
+ if (!opts::ProfileWritePseudoProbes)
parsePseudoProbe();
updatePseudoProbes();
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index b361551e5711ea..1fdd00c7ef6c4b 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -6,11 +6,11 @@
# PREAGG: B X:0 #main# 1 0
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
-# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
@@ -30,7 +30,7 @@
# CHECK-YAML: guid: 0xDB956436E78DD5FA
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
#
-## Check that without --profile-use-pseudo-probes option, no pseudo probes are
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 90d7588407068a..af7fe7edff1e70 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -19,8 +19,10 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -376,6 +378,8 @@ ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
std::size_t Size) {
+ Timer T("buildGUID2FDMap", "build GUID to FuncDesc map");
+ T.startTimer();
// The pseudo_probe_desc section has a format like:
// .section .pseudo_probe_desc,"", at progbits
// .quad -5182264717993193164 // GUID
@@ -430,6 +434,12 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
return LHS.FuncGUID < RHS.FuncGUID;
});
+ T.stopTimer();
+ auto TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "func desc ";
+ TT.print(TT, dbgs());
+ dbgs() << '\n';
return true;
}
@@ -623,12 +633,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
Data = Start;
End = Data + Size;
bool Discard = false;
+ Timer T("countRecords", "pre-parsing function records");
+ T.startTimer();
while (Data < End) {
if (!countRecords<true>(Discard, ProbeCount, InlinedCount, GuidFilter))
return false;
TopLevelFuncs += !Discard;
}
+ T.stopTimer();
+ auto TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "pre-parsing ";
+ TT.print(TT, dbgs());
assert(Data == End && "Have unprocessed data in pseudo_probe section");
+ T.startTimer();
PseudoProbeVec.reserve(ProbeCount);
InlineTreeVec.reserve(InlinedCount);
@@ -636,6 +654,13 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
InlineTreeVec.resize(TopLevelFuncs);
DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec);
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nalloc ";
+ TT.print(TT, dbgs());
+
+ T.startTimer();
Data = Start;
End = Data + Size;
uint64_t LastAddr = 0;
@@ -643,12 +668,18 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
while (Data < End)
CurChildIndex += buildAddress2ProbeMap<true>(
&DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex);
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nparsing ";
+ TT.print(TT, dbgs());
assert(Data == End && "Have unprocessed data in pseudo_probe section");
assert(PseudoProbeVec.size() == ProbeCount &&
"Mismatching probe count pre- and post-parsing");
assert(InlineTreeVec.size() == InlinedCount &&
"Mismatching function records count pre- and post-parsing");
+ T.startTimer();
std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
SortedA2P[I] = {Probe.getAddress(), I};
@@ -657,6 +688,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
for (const uint32_t I : llvm::make_second_range(SortedA2P))
Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
SortedA2P.clear();
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nsorting ";
+ TT.print(TT, dbgs());
+ dbgs() << '\n';
+ size_t PPVecSize = 32 * PseudoProbeVec.capacity();
+ size_t ITVecSize = 48 * InlineTreeVec.capacity();
+ size_t G2FDMapSize = 32 * GUID2FuncDescMap.capacity();
+ size_t StringSize = FuncNameAllocator.getBytesAllocated();
+ size_t A2PSize = 8 * Address2ProbesMap.capacity();
+ dbgs() << formatv("PPVec size: {0} GiB\n", 1.f * PPVecSize / (1 << 30))
+ << formatv("ITVec size: {0} GiB\n", 1.f * ITVecSize / (1 << 30))
+ << formatv("G2FDMap size: {0} GiB\n", 1.f * G2FDMapSize / (1 << 30))
+ << formatv(" (strings {0} GiB)\n", 1.f * StringSize / (1 << 30))
+ << formatv("A2P size: {0} GiB\n", 1.f * A2PSize / (1 << 30))
+ << formatv("Total size: {0} GiB\n",
+ 1.f * (PPVecSize + ITVecSize + G2FDMapSize + A2PSize) /
+ (1 << 30));
return true;
}
>From 3f3ab60e5e664df2407d87081e8186b162e5581e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 2 Sep 2024 17:50:47 -0700
Subject: [PATCH 4/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
bolt/lib/Profile/DataAggregator.cpp | 4 +-
bolt/lib/Profile/YAMLProfileReader.cpp | 5 --
bolt/lib/Profile/YAMLProfileWriter.cpp | 11 ++--
bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 6 +--
.../test/X86/pseudoprobe-decoding-inline.test | 6 +--
llvm/lib/MC/MCPseudoProbe.cpp | 50 +++++++++++++++++++
6 files changed, 66 insertions(+), 16 deletions(-)
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..10d745cc69824b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -88,7 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;
cl::opt<bool> ReadPreAggregated(
@@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
yaml::bolt::BinaryProfile BP;
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
// Fill out the header info.
BP.Header.Version = 1;
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3eca5e972fa5ba..604a9fb4813be4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -49,11 +49,6 @@ llvm::cl::opt<bool>
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));
-
-llvm::cl::opt<bool> ProfileUsePseudoProbes(
- "profile-use-pseudo-probes",
- cl::desc("Use pseudo probes for profile generation and matching"),
- cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f74cf60e076d0a..ffbf2388e912fb 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,6 +13,7 @@
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,12 @@
#define DEBUG_TYPE "bolt-prof"
namespace opts {
-extern llvm::cl::opt<bool> ProfileUseDFS;
-extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
+using namespace llvm;
+extern cl::opt<bool> ProfileUseDFS;
+cl::opt<bool> ProfileWritePseudoProbes(
+ "profile-write-pseudo-probes",
+ cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
+ cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..fef721167869dd 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -49,7 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
} // namespace opts
namespace {
@@ -90,14 +90,14 @@ class PseudoProbeRewriter final : public MetadataRewriter {
};
Error PseudoProbeRewriter::preCFGInitializer() {
- if (opts::ProfileUsePseudoProbes)
+ if (opts::ProfileWritePseudoProbes)
parsePseudoProbe();
return Error::success();
}
Error PseudoProbeRewriter::postEmitFinalizer() {
- if (!opts::ProfileUsePseudoProbes)
+ if (!opts::ProfileWritePseudoProbes)
parsePseudoProbe();
updatePseudoProbes();
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index b361551e5711ea..1fdd00c7ef6c4b 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -6,11 +6,11 @@
# PREAGG: B X:0 #main# 1 0
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
-# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
@@ -30,7 +30,7 @@
# CHECK-YAML: guid: 0xDB956436E78DD5FA
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
#
-## Check that without --profile-use-pseudo-probes option, no pseudo probes are
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 90d7588407068a..af7fe7edff1e70 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -19,8 +19,10 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -376,6 +378,8 @@ ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
std::size_t Size) {
+ Timer T("buildGUID2FDMap", "build GUID to FuncDesc map");
+ T.startTimer();
// The pseudo_probe_desc section has a format like:
// .section .pseudo_probe_desc,"", at progbits
// .quad -5182264717993193164 // GUID
@@ -430,6 +434,12 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
return LHS.FuncGUID < RHS.FuncGUID;
});
+ T.stopTimer();
+ auto TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "func desc ";
+ TT.print(TT, dbgs());
+ dbgs() << '\n';
return true;
}
@@ -623,12 +633,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
Data = Start;
End = Data + Size;
bool Discard = false;
+ Timer T("countRecords", "pre-parsing function records");
+ T.startTimer();
while (Data < End) {
if (!countRecords<true>(Discard, ProbeCount, InlinedCount, GuidFilter))
return false;
TopLevelFuncs += !Discard;
}
+ T.stopTimer();
+ auto TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "pre-parsing ";
+ TT.print(TT, dbgs());
assert(Data == End && "Have unprocessed data in pseudo_probe section");
+ T.startTimer();
PseudoProbeVec.reserve(ProbeCount);
InlineTreeVec.reserve(InlinedCount);
@@ -636,6 +654,13 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
InlineTreeVec.resize(TopLevelFuncs);
DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec);
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nalloc ";
+ TT.print(TT, dbgs());
+
+ T.startTimer();
Data = Start;
End = Data + Size;
uint64_t LastAddr = 0;
@@ -643,12 +668,18 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
while (Data < End)
CurChildIndex += buildAddress2ProbeMap<true>(
&DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex);
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nparsing ";
+ TT.print(TT, dbgs());
assert(Data == End && "Have unprocessed data in pseudo_probe section");
assert(PseudoProbeVec.size() == ProbeCount &&
"Mismatching probe count pre- and post-parsing");
assert(InlineTreeVec.size() == InlinedCount &&
"Mismatching function records count pre- and post-parsing");
+ T.startTimer();
std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
SortedA2P[I] = {Probe.getAddress(), I};
@@ -657,6 +688,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
for (const uint32_t I : llvm::make_second_range(SortedA2P))
Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
SortedA2P.clear();
+ T.stopTimer();
+ TT = T.getTotalTime();
+ T.clear();
+ dbgs() << "\nsorting ";
+ TT.print(TT, dbgs());
+ dbgs() << '\n';
+ size_t PPVecSize = 32 * PseudoProbeVec.capacity();
+ size_t ITVecSize = 48 * InlineTreeVec.capacity();
+ size_t G2FDMapSize = 32 * GUID2FuncDescMap.capacity();
+ size_t StringSize = FuncNameAllocator.getBytesAllocated();
+ size_t A2PSize = 8 * Address2ProbesMap.capacity();
+ dbgs() << formatv("PPVec size: {0} GiB\n", 1.f * PPVecSize / (1 << 30))
+ << formatv("ITVec size: {0} GiB\n", 1.f * ITVecSize / (1 << 30))
+ << formatv("G2FDMap size: {0} GiB\n", 1.f * G2FDMapSize / (1 << 30))
+ << formatv(" (strings {0} GiB)\n", 1.f * StringSize / (1 << 30))
+ << formatv("A2P size: {0} GiB\n", 1.f * A2PSize / (1 << 30))
+ << formatv("Total size: {0} GiB\n",
+ 1.f * (PPVecSize + ITVecSize + G2FDMapSize + A2PSize) /
+ (1 << 30));
return true;
}
More information about the llvm-commits
mailing list