[llvm] c00c62c - [BOLT] Add pseudo probe inline tree to YAML profile

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 12 20:51:39 PDT 2024


Author: Amir Ayupov
Date: 2024-09-12T20:51:35-07:00
New Revision: c00c62c113d1ac121891d644a9f18f247c24d9b1

URL: https://github.com/llvm/llvm-project/commit/c00c62c113d1ac121891d644a9f18f247c24d9b1
DIFF: https://github.com/llvm/llvm-project/commit/c00c62c113d1ac121891d644a9f18f247c24d9b1.diff

LOG: [BOLT] Add pseudo probe inline tree to YAML profile

Add probe inline tree information to YAML profile, at function level:
- function GUID,
- checksum,
- parent node id,
- call site in the parent.

This information is used for pseudo probe block matching (#99891).

The encoding adds/changes probe information in multiple levels of
YAML profile:
- BinaryProfile: add pseudo_probe_desc with GUIDs and Hashes, which
  permits deduplication of data:
  - many GUIDs are duplicate as the same callee is commonly inlined
    into multiple callers,
  - hashes are also very repetitive, especially for functions with
    low block counts.
- FunctionProfile: add inline tree (see above). Top-level function
  is included as root of function inline tree, which makes guid and
  pseudo_probe_desc_hash fields redundant.
- BlockProfile: densely-encoded block probe information:
  - probes reference their containing inline tree node,
  - separate lists for block, call, indirect call probes,
  - block probe encoding is specialized: ids are encoded as bitset
    in uint64_t. If only block probe with id=1 is present, it's
    encoded as implicit entry (id=0, omitted).
  - inline tree nodes with identical probes share probe description
    where node indices are combined into a list.

On top of #107970, profile with new probe encoding has the following
characteristics (profile for a large binary):

- Profile without probe information: 33MB, 3.8MB compressed (baseline).
- Profile with inline tree information: 92MB, 14MB compressed.

Profile processing time (YAML parsing, inference, attaching steps):
- profile without pseudo probes: 5s,
- profile with pseudo probes, without pseudo probe matching: 11s,
- with pseudo probe matching: 12.5s.

Test Plan: updated pseudoprobe-decoding-inline.test

Reviewers: wlei-llvm, ayermolo, rafaelauler, dcci, maksfb

Reviewed By: wlei-llvm, rafaelauler

Pull Request: https://github.com/llvm/llvm-project/pull/107137

Added: 
    

Modified: 
    bolt/include/bolt/Profile/ProfileYAMLMapping.h
    bolt/include/bolt/Profile/YAMLProfileWriter.h
    bolt/lib/Profile/DataAggregator.cpp
    bolt/lib/Profile/YAMLProfileWriter.cpp
    bolt/test/X86/pseudoprobe-decoding-inline.test
    bolt/test/X86/pseudoprobe-decoding-noinline.test
    llvm/include/llvm/MC/MCPseudoProbe.h

Removed: 
    


################################################################################
diff  --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 2a0514d7d9304b..91955afb186e90 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
 
 namespace bolt {
 struct PseudoProbeInfo {
-  llvm::yaml::Hex64 GUID;
-  uint64_t Index;
-  uint8_t Type;
+  uint32_t InlineTreeIndex = 0;
+  uint64_t BlockMask = 0;            // bitset with probe indices from 1 to 64
+  std::vector<uint64_t> BlockProbes; // block probes with indices above 64
+  std::vector<uint64_t> CallProbes;
+  std::vector<uint64_t> IndCallProbes;
+  std::vector<uint32_t> InlineTreeNodes;
 
   bool operator==(const PseudoProbeInfo &Other) const {
-    return GUID == Other.GUID && Index == Other.Index;
-  }
-  bool operator!=(const PseudoProbeInfo &Other) const {
-    return !(*this == Other);
+    return InlineTreeIndex == Other.InlineTreeIndex &&
+           BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
+           IndCallProbes == Other.IndCallProbes;
   }
 };
 } // end namespace bolt
 
 template <> struct MappingTraits<bolt::PseudoProbeInfo> {
   static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
-    YamlIO.mapRequired("guid", PI.GUID);
-    YamlIO.mapRequired("id", PI.Index);
-    YamlIO.mapRequired("type", PI.Type);
+    YamlIO.mapOptional("blx", PI.BlockMask, 0);
+    YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
+    YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
+    YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
+    YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
+    YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
   }
 
   static const bool flow = true;
@@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
                        std::vector<bolt::CallSiteInfo>());
     YamlIO.mapOptional("succ", BBP.Successors,
                        std::vector<bolt::SuccessorInfo>());
-    YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
+    YamlIO.mapOptional("probes", BBP.PseudoProbes,
                        std::vector<bolt::PseudoProbeInfo>());
   }
 };
 
+namespace bolt {
+struct InlineTreeNode {
+  uint32_t ParentIndexDelta;
+  uint32_t CallSiteProbe;
+  // Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
+  uint32_t GUIDIndex;
+  bool operator==(const InlineTreeNode &) const { return false; }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::InlineTreeNode> {
+  static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) {
+    YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX);
+    YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
+    YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
+  }
+
+  static const bool flow = true;
+};
 } // end namespace yaml
 } // end namespace llvm
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode)
 
 namespace llvm {
 namespace yaml {
@@ -179,8 +204,7 @@ struct BinaryFunctionProfile {
   llvm::yaml::Hex64 Hash{0};
   uint64_t ExecCount{0};
   std::vector<BinaryBasicBlockProfile> Blocks;
-  llvm::yaml::Hex64 GUID{0};
-  llvm::yaml::Hex64 PseudoProbeDescHash{0};
+  std::vector<InlineTreeNode> InlineTree;
   bool Used{false};
 };
 } // end namespace bolt
@@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
     YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
     YamlIO.mapOptional("blocks", BFP.Blocks,
                        std::vector<bolt::BinaryBasicBlockProfile>());
-    YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
-    YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
-                       (uint64_t)0);
+    YamlIO.mapOptional("inline_tree", BFP.InlineTree,
+                       std::vector<bolt::InlineTreeNode>());
   }
 };
 
@@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
   }
 };
 
+namespace bolt {
+struct PseudoProbeDesc {
+  std::vector<Hex64> GUID;
+  std::vector<Hex64> Hash;
+  std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash
+
+  bool operator==(const PseudoProbeDesc &Other) const {
+    // Only treat empty Desc as equal
+    return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
+           Other.Hash.empty() && GUIDHashIdx.empty() &&
+           Other.GUIDHashIdx.empty();
+  }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::PseudoProbeDesc> {
+  static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
+    YamlIO.mapRequired("gs", PD.GUID);
+    YamlIO.mapRequired("gh", PD.GUIDHashIdx);
+    YamlIO.mapRequired("hs", PD.Hash);
+  }
+};
 } // end namespace yaml
 } // end namespace llvm
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)
 
 namespace llvm {
 namespace yaml {
@@ -258,6 +304,7 @@ namespace bolt {
 struct BinaryProfile {
   BinaryProfileHeader Header;
   std::vector<BinaryFunctionProfile> Functions;
+  PseudoProbeDesc PseudoProbeDesc;
 };
 } // namespace bolt
 
@@ -265,6 +312,8 @@ template <> struct MappingTraits<bolt::BinaryProfile> {
   static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
     YamlIO.mapRequired("header", BP.Header);
     YamlIO.mapRequired("functions", BP.Functions);
+    YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
+                       bolt::PseudoProbeDesc());
   }
 };
 

diff  --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index 4a9355dfceac9e..aec6e474847605 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -32,8 +32,27 @@ class YAMLProfileWriter {
   /// Save execution profile for that instance.
   std::error_code writeProfile(const RewriteInstance &RI);
 
+  using InlineTreeMapTy =
+      DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
+  struct InlineTreeDesc {
+    template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
+    using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
+    using GUIDNumMap = GUIDMapTy<uint32_t>;
+    GUIDNodeMap TopLevelGUIDToInlineTree;
+    GUIDNumMap GUIDIdxMap;
+    GUIDNumMap HashIdxMap;
+  };
+
+  static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
+  convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+                      const InlineTreeDesc &InlineTree, uint64_t GUID);
+
+  static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
+  convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
+
   static yaml::bolt::BinaryFunctionProfile
   convert(const BinaryFunction &BF, bool UseDFS,
+          const InlineTreeDesc &InlineTree,
           const BoltAddressTranslation *BAT = nullptr);
 
   /// Set CallSiteInfo destination fields from \p Symbol and return a target
@@ -42,8 +61,39 @@ class YAMLProfileWriter {
   setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
                     const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
                     uint32_t Offset = 0);
-};
 
+private:
+  struct InlineTreeNode {
+    const MCDecodedPseudoProbeInlineTree *InlineTree;
+    uint64_t GUID;
+    uint64_t Hash;
+    uint32_t ParentId;
+    uint32_t InlineSite;
+  };
+  static std::vector<InlineTreeNode>
+  collectInlineTree(const MCPseudoProbeDecoder &Decoder,
+                    const MCDecodedPseudoProbeInlineTree &Root);
+
+  // 0 - block probe, 1 - indirect call, 2 - direct call
+  using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
+  using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
+  static std::vector<yaml::bolt::PseudoProbeInfo>
+  convertNodeProbes(NodeIdToProbes &NodeProbes);
+
+public:
+  template <typename T>
+  static std::vector<yaml::bolt::PseudoProbeInfo>
+  writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
+    NodeIdToProbes NodeProbes;
+    for (const MCDecodedPseudoProbe &Probe : Probes) {
+      auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
+      if (It == InlineTreeNodeId.end())
+        continue;
+      NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
+    }
+    return convertNodeProbes(NodeProbes);
+  }
+};
 } // namespace bolt
 } // namespace llvm
 

diff  --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4aeeb1daab1b94..fcde6f5f4642c8 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2321,6 +2321,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
   BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
                                            : BinaryFunction::PF_LBR;
 
+  // Add probe inline tree nodes.
+  YAMLProfileWriter::InlineTreeDesc InlineTree;
+  if (PseudoProbeDecoder)
+    std::tie(BP.PseudoProbeDesc, InlineTree) =
+        YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
+
   if (!opts::BasicAggregation) {
     // Convert profile for functions not covered by BAT
     for (auto &BFI : BC.getBinaryFunctions()) {
@@ -2329,8 +2335,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
         continue;
       if (BAT->isBATFunction(Function.getAddress()))
         continue;
-      BP.Functions.emplace_back(
-          YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
+      BP.Functions.emplace_back(YAMLProfileWriter::convert(
+          Function, /*UseDFS=*/false, InlineTree, BAT));
     }
 
     for (const auto &KV : NamesToBranches) {
@@ -2403,16 +2409,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
         YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
       }
       if (PseudoProbeDecoder) {
-        if ((YamlBF.GUID = BF->getGUID())) {
-          const MCPseudoProbeFuncDesc *FuncDesc =
-              PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
-          YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
+        DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
+            InlineTreeNodeId;
+        if (BF->getGUID()) {
+          std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+              YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
+                                                     InlineTree, BF->getGUID());
         }
         // Fetch probes belonging to all fragments
         const AddressProbesMap &ProbeMap =
             PseudoProbeDecoder->getAddress2ProbesMap();
         BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
         Fragments.insert(BF);
+        DenseMap<
+            uint32_t,
+            std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
+            BlockProbes;
         for (const BinaryFunction *F : Fragments) {
           const uint64_t FuncAddr = F->getAddress();
           for (const MCDecodedPseudoProbe &Probe :
@@ -2421,11 +2433,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
             const uint32_t InputOffset = BAT->translate(
                 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
             const unsigned BlockIndex = getBlock(InputOffset).second;
-            YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
-                yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
-                                            Probe.getType()});
+            BlockProbes[BlockIndex].emplace_back(Probe);
           }
         }
+
+        for (auto &[Block, Probes] : BlockProbes) {
+          YamlBF.Blocks[Block].PseudoProbes =
+              YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
+        }
       }
       // Skip printing if there's no profile data
       llvm::erase_if(

diff  --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index ffbf2388e912fb..44600c3c5d5ef7 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Profile/ProfileReaderBase.h"
 #include "bolt/Rewrite/RewriteInstance.h"
 #include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -58,8 +59,158 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
   return nullptr;
 }
 
+std::vector<YAMLProfileWriter::InlineTreeNode>
+YAMLProfileWriter::collectInlineTree(
+    const MCPseudoProbeDecoder &Decoder,
+    const MCDecodedPseudoProbeInlineTree &Root) {
+  auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
+    return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
+  };
+  std::vector<InlineTreeNode> InlineTree(
+      {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
+  uint32_t ParentId = 0;
+  while (ParentId != InlineTree.size()) {
+    const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
+    for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
+      InlineTree.emplace_back(
+          InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
+                         std::get<1>(Child.getInlineSite())});
+    ++ParentId;
+  }
+
+  return InlineTree;
+}
+
+std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc>
+YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
+  yaml::bolt::PseudoProbeDesc Desc;
+  InlineTreeDesc InlineTree;
+
+  for (const MCDecodedPseudoProbeInlineTree &TopLev :
+       Decoder.getDummyInlineRoot().getChildren())
+    InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
+
+  for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
+    ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
+
+  InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
+  for (const auto &Node : Decoder.getInlineTreeVec())
+    ++InlineTree.GUIDIdxMap[Node.Guid];
+
+  std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
+  GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
+  for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
+    GUIDFreqVec.emplace_back(Cnt, GUID);
+  llvm::sort(GUIDFreqVec);
+
+  std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
+  HashFreqVec.reserve(InlineTree.HashIdxMap.size());
+  for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
+    HashFreqVec.emplace_back(Cnt, Hash);
+  llvm::sort(HashFreqVec);
+
+  uint32_t Index = 0;
+  Desc.Hash.reserve(HashFreqVec.size());
+  for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
+    Desc.Hash.emplace_back(Hash);
+    InlineTree.HashIdxMap[Hash] = Index++;
+  }
+
+  Index = 0;
+  Desc.GUID.reserve(GUIDFreqVec.size());
+  for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
+    Desc.GUID.emplace_back(GUID);
+    InlineTree.GUIDIdxMap[GUID] = Index++;
+    uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
+    Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
+  }
+
+  return {Desc, InlineTree};
+}
+
+std::vector<yaml::bolt::PseudoProbeInfo>
+YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
+  struct BlockProbeInfoHasher {
+    size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
+      auto HashCombine = [](auto &Range) {
+        return llvm::hash_combine_range(Range.begin(), Range.end());
+      };
+      return llvm::hash_combine(HashCombine(BPI.BlockProbes),
+                                HashCombine(BPI.CallProbes),
+                                HashCombine(BPI.IndCallProbes));
+    }
+  };
+
+  // Check identical BlockProbeInfo structs and merge them
+  std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
+                     BlockProbeInfoHasher>
+      BPIToNodes;
+  for (auto &[NodeId, Probes] : NodeProbes) {
+    yaml::bolt::PseudoProbeInfo BPI;
+    BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
+    BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
+    BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
+    BPIToNodes[BPI].push_back(NodeId);
+  }
+
+  auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
+    for (auto Id : Ids)
+      if (Id > 64)
+        Vec.emplace_back(Id);
+      else
+        Mask |= 1ull << (Id - 1);
+  };
+
+  // Add to YAML with merged nodes/block mask optimizations
+  std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
+  YamlProbes.reserve(BPIToNodes.size());
+  for (const auto &[BPI, Nodes] : BPIToNodes) {
+    auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
+    YamlBPI.CallProbes = BPI.CallProbes;
+    YamlBPI.IndCallProbes = BPI.IndCallProbes;
+    if (Nodes.size() == 1)
+      YamlBPI.InlineTreeIndex = Nodes.front();
+    else
+      YamlBPI.InlineTreeNodes = Nodes;
+    handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+  }
+  return YamlProbes;
+}
+
+std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
+           YAMLProfileWriter::InlineTreeMapTy>
+YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+                                       const InlineTreeDesc &InlineTree,
+                                       uint64_t GUID) {
+  DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+  std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
+  auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
+  if (It == InlineTree.TopLevelGUIDToInlineTree.end())
+    return {YamlInlineTree, InlineTreeNodeId};
+  const MCDecodedPseudoProbeInlineTree *Root = It->second;
+  assert(Root && "Malformed TopLevelGUIDToInlineTree");
+  uint32_t Index = 0;
+  uint32_t PrevParent = 0;
+  uint32_t PrevGUIDIdx = 0;
+  for (const auto &Node : collectInlineTree(Decoder, *Root)) {
+    InlineTreeNodeId[Node.InlineTree] = Index++;
+    auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
+    assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
+    uint32_t GUIDIdx = GUIDIdxIt->second;
+    if (GUIDIdx == PrevGUIDIdx)
+      GUIDIdx = UINT32_MAX;
+    else
+      PrevGUIDIdx = GUIDIdx;
+    YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{
+        Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx});
+    PrevParent = Node.ParentId;
+  }
+  return {YamlInlineTree, InlineTreeNodeId};
+}
+
 yaml::bolt::BinaryFunctionProfile
 YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
+                           const InlineTreeDesc &InlineTree,
                            const BoltAddressTranslation *BAT) {
   yaml::bolt::BinaryFunctionProfile YamlBF;
   const BinaryContext &BC = BF.getBinaryContext();
@@ -77,12 +228,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
   YamlBF.Hash = BF.getHash();
   YamlBF.NumBasicBlocks = BF.size();
   YamlBF.ExecCount = BF.getKnownExecutionCount();
-  if (PseudoProbeDecoder) {
-    if ((YamlBF.GUID = BF.getGUID())) {
-      const MCPseudoProbeFuncDesc *FuncDesc =
-          PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
-      YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
-    }
+  DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+  if (PseudoProbeDecoder && BF.getGUID()) {
+    std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+        convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
   }
 
   BinaryFunction::BasicBlockOrderType Order;
@@ -198,10 +347,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
       const uint64_t FuncAddr = BF.getAddress();
       const std::pair<uint64_t, uint64_t> &BlockRange =
           BB->getInputAddressRange();
-      for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
-               FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
-        YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
-            Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+      const std::pair<uint64_t, uint64_t> BlockAddrRange = {
+          FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
+      auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
+      YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
     }
 
     YamlBF.Blocks.emplace_back(YamlBB);
@@ -256,6 +405,12 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
   }
   BP.Header.Flags = ProfileFlags;
 
+  // Add probe inline tree nodes.
+  InlineTreeDesc InlineTree;
+  if (const MCPseudoProbeDecoder *Decoder =
+          opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
+    std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
+
   // Add all function objects.
   for (const auto &BFI : Functions) {
     const BinaryFunction &BF = BFI.second;
@@ -263,7 +418,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
       if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
         continue;
 
-      BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
+      BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
     }
   }
 

diff  --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index 1fdd00c7ef6c4b..e5e8aadc18f9e1 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -14,29 +14,31 @@
 # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
 # CHECK-YAML: name: bar
 # CHECK-YAML: - bid: 0
-# CHECK-YAML:   pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
-# CHECK-YAML: guid: 0xE413754A191DB537
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
+# CHECK-YAML:   probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
 #
 # CHECK-YAML: name: foo
 # CHECK-YAML: - bid: 0
-# CHECK-YAML:   pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
-# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
+# CHECK-YAML:   probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
 #
 # CHECK-YAML: name: main
 # CHECK-YAML: - bid: 0
-# CHECK-YAML:   pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0xDB956436E78DD5FA
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
+# CHECK-YAML:   probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
+# CHECK-YAML-NEXT: gh: [ 2, 0, 1 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
 #
 ## Check that without --profile-write-pseudo-probes option, no pseudo probes are
 ## generated
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
-# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
-# CHECK-NO-OPT-NOT: pseudo_probes
-# CHECK-NO-OPT-NOT: guid
-# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
 
 CHECK: Report of decoding input pseudo probe binaries
 

diff  --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test
index 5dd6c2e25bcfe9..36a2fab74e8570 100644
--- a/bolt/test/X86/pseudoprobe-decoding-noinline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test
@@ -1,6 +1,45 @@
 # REQUIRES: system-linux
-# RUN: llvm-bolt  %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
+# RUN: llvm-bolt  %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s
 
+# PREAGG: B X:0 #foo# 1 0
+# PREAGG: B X:0 #bar# 1 0
+# PREAGG: B X:0 #main# 1 0
+
+## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
+# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
+## Check pseudo-probes in BAT YAML profile (BOLTed binary)
+# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
+# CHECK-YAML: name: bar
+# CHECK-YAML: - bid: 0
+# CHECK-YAML:   probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ {  } ]
+#
+# CHECK-YAML: name: foo
+# CHECK-YAML: - bid: 0
+# CHECK-YAML:   probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 } ]
+#
+# CHECK-YAML: name: main
+# CHECK-YAML: - bid: 0
+# CHECK-YAML:   probes: [ { blx: 1, call: [ 2 ] } ]
+# CHECK-YAML: inline_tree: [ { g: 1 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ]
+# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
+#
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
+## generated
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
 ;; Report of decoding input pseudo probe binaries
 
 ; CHECK: GUID: 6699318081062747564 Name: foo

diff  --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 32905c1e9a424a..4bfae9eba1a0aa 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -507,6 +507,10 @@ class MCPseudoProbeDecoder {
     return iterator_range(It->second);
   }
 
+  const ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
+    return InlineTreeVec;
+  }
+
 private:
   // Recursively parse an inlining tree encoded in pseudo_probe section. Returns
   // whether the the top-level node should be skipped.


        


More information about the llvm-commits mailing list