[llvm] [BOLT][NFCI] Strip suffix in getLTOCommonName (PR #106243)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 17:53:48 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/106243

>From 1f2e9a33d80616e7acdc68f1956a37f82abecd9e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 27 Aug 2024 09:28:19 -0700
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/include/bolt/Utils/Utils.h | 6 +++++-
 bolt/lib/Utils/Utils.cpp        | 7 ++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h
index 3886c5f8757c08..63de3c5ee8f123 100644
--- a/bolt/include/bolt/Utils/Utils.h
+++ b/bolt/include/bolt/Utils/Utils.h
@@ -64,8 +64,12 @@ std::string getUnescapedName(const StringRef &Name);
 /// of functions. Later, out of all matching profiles we pick the one with the
 /// best match.
 ///
+static SmallVector<StringRef, 4> LTOSuffixes({".__uniq.", ".lto_priv.",
+                                              ".constprop.", ".llvm."});
 /// Return a common part of LTO name for a given \p Name.
-std::optional<StringRef> getLTOCommonName(const StringRef Name);
+std::optional<StringRef>
+getLTOCommonName(const StringRef Name,
+                 ArrayRef<StringRef> Suffixes = LTOSuffixes);
 
 // Determines which register a given DWARF expression is being assigned to.
 // If the expression is defining the CFA, return std::nullopt.
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index 718e97535fd22a..a4b6a6a317e2cb 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -66,11 +66,12 @@ std::string getUnescapedName(const StringRef &Name) {
   return Output;
 }
 
-std::optional<StringRef> getLTOCommonName(const StringRef Name) {
-  for (StringRef Suffix : {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}) {
+std::optional<StringRef> getLTOCommonName(const StringRef Name,
+                                          ArrayRef<StringRef> Suffixes) {
+  for (StringRef Suffix : Suffixes) {
     size_t LTOSuffixPos = Name.find(Suffix);
     if (LTOSuffixPos != StringRef::npos)
-      return Name.substr(0, LTOSuffixPos + Suffix.size());
+      return Name.substr(0, LTOSuffixPos);
   }
   return std::nullopt;
 }

>From 6eff5fe988ce505903a3836e86f274c4a1616fb3 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 27 Aug 2024 15:35:35 -0700
Subject: [PATCH 2/4] define getCommonName

Created using spr 1.3.4
---
 bolt/include/bolt/Utils/Utils.h          | 11 ++++++-----
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 11 +++++++++--
 bolt/lib/Utils/Utils.cpp                 | 11 ++++++++---
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h
index 63de3c5ee8f123..9baee7d94066de 100644
--- a/bolt/include/bolt/Utils/Utils.h
+++ b/bolt/include/bolt/Utils/Utils.h
@@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name);
 /// Return the unescaped name
 std::string getUnescapedName(const StringRef &Name);
 
+/// Return a common part for a given \p Name wrt a given \p Suffixes list.
+/// Preserve the suffix if \p KeepSuffix is set, only dropping characters
+/// following it, otherwise drop the suffix as well.
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+                                       ArrayRef<StringRef> Suffixes);
 /// LTO-generated function names take a form:
 ///
 ///   <function_name>.lto_priv.<decimal_number>/...
@@ -64,12 +69,8 @@ std::string getUnescapedName(const StringRef &Name);
 /// of functions. Later, out of all matching profiles we pick the one with the
 /// best match.
 ///
-static SmallVector<StringRef, 4> LTOSuffixes({".__uniq.", ".lto_priv.",
-                                              ".constprop.", ".llvm."});
 /// Return a common part of LTO name for a given \p Name.
-std::optional<StringRef>
-getLTOCommonName(const StringRef Name,
-                 ArrayRef<StringRef> Suffixes = LTOSuffixes);
+std::optional<StringRef> getLTOCommonName(const StringRef Name);
 
 // Determines which register a given DWARF expression is being assigned to.
 // If the expression is defining the CFA, return std::nullopt.
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..6e80d9b0014b7b 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Rewrite/MetadataRewriter.h"
 #include "bolt/Rewrite/MetadataRewriters.h"
 #include "bolt/Utils/CommandLineOpts.h"
+#include "bolt/Utils/Utils.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/Support/CommandLine.h"
@@ -133,10 +134,16 @@ void PseudoProbeRewriter::parsePseudoProbe() {
 
   MCPseudoProbeDecoder::Uint64Set GuidFilter;
   MCPseudoProbeDecoder::Uint64Map FuncStartAddrs;
+  SmallVector<StringRef, 3> Suffixes({".llvm.", ".destroy", ".resume"});
   for (const BinaryFunction *F : BC.getAllBinaryFunctions()) {
     for (const MCSymbol *Sym : F->getSymbols()) {
-      FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] =
-          F->getAddress();
+      StringRef SymName = NameResolver::restore(Sym->getName());
+      if (std::optional<StringRef> CommonName =
+              getCommonName(SymName, false, Suffixes)) {
+        SymName = *CommonName;
+      }
+      uint64_t GUID = Function::getGUID(SymName);
+      FuncStartAddrs[GUID] = F->getAddress();
     }
   }
   Contents = PseudoProbeSection->getContents();
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index a4b6a6a317e2cb..ecc2f1010a9858 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -66,16 +66,21 @@ std::string getUnescapedName(const StringRef &Name) {
   return Output;
 }
 
-std::optional<StringRef> getLTOCommonName(const StringRef Name,
-                                          ArrayRef<StringRef> Suffixes) {
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+                                       ArrayRef<StringRef> Suffixes) {
   for (StringRef Suffix : Suffixes) {
     size_t LTOSuffixPos = Name.find(Suffix);
     if (LTOSuffixPos != StringRef::npos)
-      return Name.substr(0, LTOSuffixPos);
+      return Name.substr(0, LTOSuffixPos + (KeepSuffix ? Suffix.size() : 0));
   }
   return std::nullopt;
 }
 
+std::optional<StringRef> getLTOCommonName(const StringRef Name) {
+  return getCommonName(Name, true,
+                       {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."});
+}
+
 std::optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) {
   uint8_t Opcode = ExprBytes[0];
   if (Opcode == dwarf::DW_CFA_def_cfa_expression)

>From 04102f20172912980c5baf0033459e2086fbe0f2 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 2 Sep 2024 17:49:19 -0700
Subject: [PATCH 3/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 bolt/lib/Profile/DataAggregator.cpp           |  4 +-
 bolt/lib/Profile/YAMLProfileReader.cpp        |  5 --
 bolt/lib/Profile/YAMLProfileWriter.cpp        | 11 ++--
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp      |  6 +--
 .../test/X86/pseudoprobe-decoding-inline.test |  6 +--
 llvm/lib/MC/MCPseudoProbe.cpp                 | 50 +++++++++++++++++++
 6 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..10d745cc69824b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -88,7 +88,7 @@ MaxSamples("max-samples",
   cl::cat(AggregatorCategory));
 
 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
 extern cl::opt<std::string> SaveProfile;
 
 cl::opt<bool> ReadPreAggregated(
@@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
   yaml::bolt::BinaryProfile BP;
 
   const MCPseudoProbeDecoder *PseudoProbeDecoder =
-      opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+      opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
 
   // Fill out the header info.
   BP.Header.Version = 1;
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3eca5e972fa5ba..604a9fb4813be4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -49,11 +49,6 @@ llvm::cl::opt<bool>
 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
                                   cl::desc("use DFS order for YAML profile"),
                                   cl::Hidden, cl::cat(BoltOptCategory));
-
-llvm::cl::opt<bool> ProfileUsePseudoProbes(
-    "profile-use-pseudo-probes",
-    cl::desc("Use pseudo probes for profile generation and matching"),
-    cl::Hidden, cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f74cf60e076d0a..ffbf2388e912fb 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,6 +13,7 @@
 #include "bolt/Profile/DataAggregator.h"
 #include "bolt/Profile/ProfileReaderBase.h"
 #include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,12 @@
 #define DEBUG_TYPE "bolt-prof"
 
 namespace opts {
-extern llvm::cl::opt<bool> ProfileUseDFS;
-extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
+using namespace llvm;
+extern cl::opt<bool> ProfileUseDFS;
+cl::opt<bool> ProfileWritePseudoProbes(
+    "profile-write-pseudo-probes",
+    cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
+    cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
@@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
   yaml::bolt::BinaryFunctionProfile YamlBF;
   const BinaryContext &BC = BF.getBinaryContext();
   const MCPseudoProbeDecoder *PseudoProbeDecoder =
-      opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+      opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
 
   const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
 
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..fef721167869dd 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -49,7 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
                clEnumValN(PPP_All, "all", "enable all debugging printout")),
     cl::Hidden, cl::cat(BoltCategory));
 
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
 } // namespace opts
 
 namespace {
@@ -90,14 +90,14 @@ class PseudoProbeRewriter final : public MetadataRewriter {
 };
 
 Error PseudoProbeRewriter::preCFGInitializer() {
-  if (opts::ProfileUsePseudoProbes)
+  if (opts::ProfileWritePseudoProbes)
     parsePseudoProbe();
 
   return Error::success();
 }
 
 Error PseudoProbeRewriter::postEmitFinalizer() {
-  if (!opts::ProfileUsePseudoProbes)
+  if (!opts::ProfileWritePseudoProbes)
     parsePseudoProbe();
   updatePseudoProbes();
 
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index b361551e5711ea..1fdd00c7ef6c4b 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -6,11 +6,11 @@
 # PREAGG: B X:0 #main# 1 0
 ## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
 # RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
 # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
 ## Check pseudo-probes in BAT YAML profile (BOLTed binary)
 # RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
-# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
 # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
 # CHECK-YAML: name: bar
 # CHECK-YAML: - bid: 0
@@ -30,7 +30,7 @@
 # CHECK-YAML: guid: 0xDB956436E78DD5FA
 # CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
 #
-## Check that without --profile-use-pseudo-probes option, no pseudo probes are
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
 ## generated
 # RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
 # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 90d7588407068a..af7fe7edff1e70 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -19,8 +19,10 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -376,6 +378,8 @@ ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
 
 bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
                                                  std::size_t Size) {
+  Timer T("buildGUID2FDMap", "build GUID to FuncDesc map");
+  T.startTimer();
   // The pseudo_probe_desc section has a format like:
   // .section .pseudo_probe_desc,"", at progbits
   // .quad -5182264717993193164   // GUID
@@ -430,6 +434,12 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
   llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
     return LHS.FuncGUID < RHS.FuncGUID;
   });
+  T.stopTimer();
+  auto TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "func desc ";
+  TT.print(TT, dbgs());
+  dbgs() << '\n';
   return true;
 }
 
@@ -623,12 +633,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   Data = Start;
   End = Data + Size;
   bool Discard = false;
+  Timer T("countRecords", "pre-parsing function records");
+  T.startTimer();
   while (Data < End) {
     if (!countRecords<true>(Discard, ProbeCount, InlinedCount, GuidFilter))
       return false;
     TopLevelFuncs += !Discard;
   }
+  T.stopTimer();
+  auto TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "pre-parsing ";
+  TT.print(TT, dbgs());
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+  T.startTimer();
   PseudoProbeVec.reserve(ProbeCount);
   InlineTreeVec.reserve(InlinedCount);
 
@@ -636,6 +654,13 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   InlineTreeVec.resize(TopLevelFuncs);
   DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec);
 
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nalloc ";
+  TT.print(TT, dbgs());
+
+  T.startTimer();
   Data = Start;
   End = Data + Size;
   uint64_t LastAddr = 0;
@@ -643,12 +668,18 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   while (Data < End)
     CurChildIndex += buildAddress2ProbeMap<true>(
         &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex);
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nparsing ";
+  TT.print(TT, dbgs());
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
   assert(PseudoProbeVec.size() == ProbeCount &&
          "Mismatching probe count pre- and post-parsing");
   assert(InlineTreeVec.size() == InlinedCount &&
          "Mismatching function records count pre- and post-parsing");
 
+  T.startTimer();
   std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
   for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
     SortedA2P[I] = {Probe.getAddress(), I};
@@ -657,6 +688,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   for (const uint32_t I : llvm::make_second_range(SortedA2P))
     Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
   SortedA2P.clear();
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nsorting ";
+  TT.print(TT, dbgs());
+  dbgs() << '\n';
+  size_t PPVecSize = 32 * PseudoProbeVec.capacity();
+  size_t ITVecSize = 48 * InlineTreeVec.capacity();
+  size_t G2FDMapSize = 32 * GUID2FuncDescMap.capacity();
+  size_t StringSize = FuncNameAllocator.getBytesAllocated();
+  size_t A2PSize = 8 * Address2ProbesMap.capacity();
+  dbgs() << formatv("PPVec size: {0} GiB\n", 1.f * PPVecSize / (1 << 30))
+         << formatv("ITVec size: {0} GiB\n", 1.f * ITVecSize / (1 << 30))
+         << formatv("G2FDMap size: {0} GiB\n", 1.f * G2FDMapSize / (1 << 30))
+         << formatv("  (strings {0} GiB)\n", 1.f * StringSize / (1 << 30))
+         << formatv("A2P size: {0} GiB\n", 1.f * A2PSize / (1 << 30))
+         << formatv("Total size: {0} GiB\n",
+                    1.f * (PPVecSize + ITVecSize + G2FDMapSize + A2PSize) /
+                        (1 << 30));
   return true;
 }
 

>From 3f3ab60e5e664df2407d87081e8186b162e5581e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 2 Sep 2024 17:50:47 -0700
Subject: [PATCH 4/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 bolt/lib/Profile/DataAggregator.cpp           |  4 +-
 bolt/lib/Profile/YAMLProfileReader.cpp        |  5 --
 bolt/lib/Profile/YAMLProfileWriter.cpp        | 11 ++--
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp      |  6 +--
 .../test/X86/pseudoprobe-decoding-inline.test |  6 +--
 llvm/lib/MC/MCPseudoProbe.cpp                 | 50 +++++++++++++++++++
 6 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825f8b570c..10d745cc69824b 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -88,7 +88,7 @@ MaxSamples("max-samples",
   cl::cat(AggregatorCategory));
 
 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
 extern cl::opt<std::string> SaveProfile;
 
 cl::opt<bool> ReadPreAggregated(
@@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
   yaml::bolt::BinaryProfile BP;
 
   const MCPseudoProbeDecoder *PseudoProbeDecoder =
-      opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+      opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
 
   // Fill out the header info.
   BP.Header.Version = 1;
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3eca5e972fa5ba..604a9fb4813be4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -49,11 +49,6 @@ llvm::cl::opt<bool>
 llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
                                   cl::desc("use DFS order for YAML profile"),
                                   cl::Hidden, cl::cat(BoltOptCategory));
-
-llvm::cl::opt<bool> ProfileUsePseudoProbes(
-    "profile-use-pseudo-probes",
-    cl::desc("Use pseudo probes for profile generation and matching"),
-    cl::Hidden, cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f74cf60e076d0a..ffbf2388e912fb 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,6 +13,7 @@
 #include "bolt/Profile/DataAggregator.h"
 #include "bolt/Profile/ProfileReaderBase.h"
 #include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -21,8 +22,12 @@
 #define DEBUG_TYPE "bolt-prof"
 
 namespace opts {
-extern llvm::cl::opt<bool> ProfileUseDFS;
-extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
+using namespace llvm;
+extern cl::opt<bool> ProfileUseDFS;
+cl::opt<bool> ProfileWritePseudoProbes(
+    "profile-write-pseudo-probes",
+    cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
+    cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
@@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
   yaml::bolt::BinaryFunctionProfile YamlBF;
   const BinaryContext &BC = BF.getBinaryContext();
   const MCPseudoProbeDecoder *PseudoProbeDecoder =
-      opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+      opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
 
   const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
 
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b385d9b1..fef721167869dd 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -49,7 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
                clEnumValN(PPP_All, "all", "enable all debugging printout")),
     cl::Hidden, cl::cat(BoltCategory));
 
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
 } // namespace opts
 
 namespace {
@@ -90,14 +90,14 @@ class PseudoProbeRewriter final : public MetadataRewriter {
 };
 
 Error PseudoProbeRewriter::preCFGInitializer() {
-  if (opts::ProfileUsePseudoProbes)
+  if (opts::ProfileWritePseudoProbes)
     parsePseudoProbe();
 
   return Error::success();
 }
 
 Error PseudoProbeRewriter::postEmitFinalizer() {
-  if (!opts::ProfileUsePseudoProbes)
+  if (!opts::ProfileWritePseudoProbes)
     parsePseudoProbe();
   updatePseudoProbes();
 
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index b361551e5711ea..1fdd00c7ef6c4b 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -6,11 +6,11 @@
 # PREAGG: B X:0 #main# 1 0
 ## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
 # RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
 # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
 ## Check pseudo-probes in BAT YAML profile (BOLTed binary)
 # RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
-# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
 # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
 # CHECK-YAML: name: bar
 # CHECK-YAML: - bid: 0
@@ -30,7 +30,7 @@
 # CHECK-YAML: guid: 0xDB956436E78DD5FA
 # CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
 #
-## Check that without --profile-use-pseudo-probes option, no pseudo probes are
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
 ## generated
 # RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
 # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 90d7588407068a..af7fe7edff1e70 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -19,8 +19,10 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -376,6 +378,8 @@ ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
 
 bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
                                                  std::size_t Size) {
+  Timer T("buildGUID2FDMap", "build GUID to FuncDesc map");
+  T.startTimer();
   // The pseudo_probe_desc section has a format like:
   // .section .pseudo_probe_desc,"", at progbits
   // .quad -5182264717993193164   // GUID
@@ -430,6 +434,12 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
   llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
     return LHS.FuncGUID < RHS.FuncGUID;
   });
+  T.stopTimer();
+  auto TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "func desc ";
+  TT.print(TT, dbgs());
+  dbgs() << '\n';
   return true;
 }
 
@@ -623,12 +633,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   Data = Start;
   End = Data + Size;
   bool Discard = false;
+  Timer T("countRecords", "pre-parsing function records");
+  T.startTimer();
   while (Data < End) {
     if (!countRecords<true>(Discard, ProbeCount, InlinedCount, GuidFilter))
       return false;
     TopLevelFuncs += !Discard;
   }
+  T.stopTimer();
+  auto TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "pre-parsing ";
+  TT.print(TT, dbgs());
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+  T.startTimer();
   PseudoProbeVec.reserve(ProbeCount);
   InlineTreeVec.reserve(InlinedCount);
 
@@ -636,6 +654,13 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   InlineTreeVec.resize(TopLevelFuncs);
   DummyInlineRoot.getChildren() = MutableArrayRef(InlineTreeVec);
 
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nalloc ";
+  TT.print(TT, dbgs());
+
+  T.startTimer();
   Data = Start;
   End = Data + Size;
   uint64_t LastAddr = 0;
@@ -643,12 +668,18 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   while (Data < End)
     CurChildIndex += buildAddress2ProbeMap<true>(
         &DummyInlineRoot, LastAddr, GuidFilter, FuncStartAddrs, CurChildIndex);
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nparsing ";
+  TT.print(TT, dbgs());
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
   assert(PseudoProbeVec.size() == ProbeCount &&
          "Mismatching probe count pre- and post-parsing");
   assert(InlineTreeVec.size() == InlinedCount &&
          "Mismatching function records count pre- and post-parsing");
 
+  T.startTimer();
   std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
   for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
     SortedA2P[I] = {Probe.getAddress(), I};
@@ -657,6 +688,25 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   for (const uint32_t I : llvm::make_second_range(SortedA2P))
     Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
   SortedA2P.clear();
+  T.stopTimer();
+  TT = T.getTotalTime();
+  T.clear();
+  dbgs() << "\nsorting ";
+  TT.print(TT, dbgs());
+  dbgs() << '\n';
+  size_t PPVecSize = 32 * PseudoProbeVec.capacity();
+  size_t ITVecSize = 48 * InlineTreeVec.capacity();
+  size_t G2FDMapSize = 32 * GUID2FuncDescMap.capacity();
+  size_t StringSize = FuncNameAllocator.getBytesAllocated();
+  size_t A2PSize = 8 * Address2ProbesMap.capacity();
+  dbgs() << formatv("PPVec size: {0} GiB\n", 1.f * PPVecSize / (1 << 30))
+         << formatv("ITVec size: {0} GiB\n", 1.f * ITVecSize / (1 << 30))
+         << formatv("G2FDMap size: {0} GiB\n", 1.f * G2FDMapSize / (1 << 30))
+         << formatv("  (strings {0} GiB)\n", 1.f * StringSize / (1 << 30))
+         << formatv("A2P size: {0} GiB\n", 1.f * A2PSize / (1 << 30))
+         << formatv("Total size: {0} GiB\n",
+                    1.f * (PPVecSize + ITVecSize + G2FDMapSize + A2PSize) /
+                        (1 << 30));
   return true;
 }
 



More information about the llvm-commits mailing list