[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

Amir Ayupov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Nov 8 15:25:52 PST 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891

>From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 28 Aug 2024 21:10:25 +0200
Subject: [PATCH 1/3] Reworked block probe matching

Use new probe ifaces
Get all function probes at once
Drop ProfileUsePseudoProbes
Unify matchWithBlockPseudoProbes
Distinguish exact and loose probe match
---
 bolt/include/bolt/Core/BinaryContext.h    |  20 +-
 bolt/lib/Passes/BinaryPasses.cpp          |  40 ++-
 bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++++++++++------------
 bolt/lib/Rewrite/PseudoProbeRewriter.cpp  |   8 +-
 4 files changed, 237 insertions(+), 235 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 3e20cb607e657b..3f7b2ac0bc6cf9 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -724,14 +724,26 @@ class BinaryContext {
     uint32_t NumStaleBlocks{0};
     ///   the number of exactly matched basic blocks
     uint32_t NumExactMatchedBlocks{0};
-    ///   the number of pseudo probe matched basic blocks
-    uint32_t NumPseudoProbeMatchedBlocks{0};
+    ///   the number of loosely matched basic blocks
+    uint32_t NumLooseMatchedBlocks{0};
+    ///   the number of exactly pseudo probe matched basic blocks
+    uint32_t NumPseudoProbeExactMatchedBlocks{0};
+    ///   the number of loosely pseudo probe matched basic blocks
+    uint32_t NumPseudoProbeLooseMatchedBlocks{0};
+    ///   the number of call matched basic blocks
+    uint32_t NumCallMatchedBlocks{0};
     ///   the total count of samples in the profile
     uint64_t StaleSampleCount{0};
     ///   the count of exactly matched samples
     uint64_t ExactMatchedSampleCount{0};
-    ///   the count of pseudo probe matched samples
-    uint64_t PseudoProbeMatchedSampleCount{0};
+    ///   the count of exactly matched samples
+    uint64_t LooseMatchedSampleCount{0};
+    ///   the count of exactly pseudo probe matched samples
+    uint64_t PseudoProbeExactMatchedSampleCount{0};
+    ///   the count of loosely pseudo probe matched samples
+    uint64_t PseudoProbeLooseMatchedSampleCount{0};
+    ///   the count of call matched samples
+    uint64_t CallMatchedSampleCount{0};
     ///   the number of stale functions that have matching number of blocks in
     ///   the profile
     uint64_t NumStaleFuncsWithEqualBlockCount{0};
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index b786f07a6a6651..8edbd58c3ed3de 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
         100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount,
         BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount);
     BC.outs() << format(
-        "BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic "
+        "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of "
+        "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
+        " (%zu out of %zu stale)\n",
+        100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks /
+            BC.Stats.NumStaleBlocks,
+        BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount /
+            BC.Stats.StaleSampleCount,
+        BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount);
+    BC.outs() << format(
+        "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of "
+        "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
+        " (%zu out of %zu stale)\n",
+        100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks /
+            BC.Stats.NumStaleBlocks,
+        BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount /
+            BC.Stats.StaleSampleCount,
+        BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount);
+    BC.outs() << format(
+        "BOLT-INFO: inference found a call match for %.2f%% of basic "
         "blocks"
         " (%zu out of %zu stale) responsible for %.2f%% samples"
         " (%zu out of %zu stale)\n",
-        100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks,
-        BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks,
-        100.0 * BC.Stats.PseudoProbeMatchedSampleCount /
-            BC.Stats.StaleSampleCount,
-        BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount);
+        100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount,
+        BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount);
+    BC.outs() << format(
+        "BOLT-INFO: inference found a loose match for %.2f%% of basic "
+        "blocks"
+        " (%zu out of %zu stale) responsible for %.2f%% samples"
+        " (%zu out of %zu stale)\n",
+        100.0 * BC.Stats.NumLooseMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumLooseMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.LooseMatchedSampleCount / BC.Stats.StaleSampleCount,
+        BC.Stats.LooseMatchedSampleCount, BC.Stats.StaleSampleCount);
   }
 
   if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) {
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index ef9320ae168fe7..2ec74ac7549f7c 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -29,6 +29,7 @@
 #include "bolt/Profile/YAMLProfileReader.h"
 #include "llvm/ADT/Bitfields.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/xxhash.h"
@@ -46,7 +47,6 @@ namespace opts {
 extern cl::opt<bool> TimeRewrite;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<unsigned> Verbosity;
-extern cl::opt<bool> ProfileUsePseudoProbes;
 
 cl::opt<bool>
     InferStaleProfile("infer-stale-profile",
@@ -198,8 +198,6 @@ struct BlendedBlockHash {
 /// release.
 class StaleMatcher {
 public:
-  StaleMatcher(const uint64_t YamlBFGUID) : YamlBFGUID(YamlBFGUID) {}
-
   /// Initialize stale matcher.
   void init(const std::vector<FlowBlock *> &Blocks,
             const std::vector<BlendedBlockHash> &Hashes,
@@ -217,39 +215,38 @@ class StaleMatcher {
     }
   }
 
-  /// Creates a mapping from a inlined pseudo probe's guid and index to probe.
-  void mapGUIDAndIndexToProbe(uint64_t Guid, uint64_t Index,
-                              const MCDecodedPseudoProbe *Probe) {
-    IndexAndGUIDToInlinedProbes[Guid][Index].push_back(Probe);
-  }
-
-  /// Creates a mapping from a pseudo probe index to pseudo probe.
-  void mapIndexToProbe(uint64_t Index, const MCDecodedPseudoProbe *Probe) {
-    IndexToProbes[Index].push_back(Probe);
-  }
-
   /// Creates a mapping from a pseudo probe to a flow block.
   void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) {
     BBPseudoProbeToBlock[Probe] = Block;
   }
 
+  enum MatchMethod : char {
+    MATCH_EXACT = 0,
+    MATCH_PROBE_EXACT,
+    MATCH_PROBE_LOOSE,
+    MATCH_OPCODE,
+    MATCH_CALL,
+    NO_MATCH
+  };
+
   /// Find the most similar flow block for a profile block given its hashes and
   /// pseudo probe information.
-  const FlowBlock *
+  std::pair<const FlowBlock *, MatchMethod>
   matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash,
-             const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
-    const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
-    if (BestBlock) {
-      ++MatchedWithOpcodes;
-      return BestBlock;
-    }
-    BestBlock = matchWithCalls(BlendedHash, CallHash);
-    if (BestBlock)
-      return BestBlock;
-    BestBlock = matchWithPseudoProbes(BlendedHash, PseudoProbes);
-    if (BestBlock)
-      MatchedWithPseudoProbes.insert(BlendedHash.combine());
-    return BestBlock;
+             const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
+             const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) {
+    const auto &[Block, Hash] = matchWithOpcodes(BlendedHash);
+    if (isHighConfidenceMatch(Hash, BlendedHash))
+      return {Block, MATCH_EXACT};
+    const auto &[ProbeBlock, Exact] =
+        matchWithPseudoProbes(PseudoProbes, InlineTree);
+    if (ProbeBlock)
+      return {ProbeBlock, Exact ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
+    if (const FlowBlock *BestBlock = matchWithCalls(BlendedHash, CallHash))
+      return {BestBlock, MATCH_CALL};
+    if (Block)
+      return {Block, MATCH_OPCODE};
+    return {nullptr, NO_MATCH};
   }
 
   /// Returns true if the two basic blocks (in the binary and in the profile)
@@ -260,48 +257,49 @@ class StaleMatcher {
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
-  /// Returns true if a profiled block was matched with its pseudo probe.
-  bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
-    return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
-           MatchedWithPseudoProbes.end();
+  /// Returns matched InlineTree * for a given profile inline_tree_id.
+  const MCDecodedPseudoProbeInlineTree *
+  getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
+    auto It = InlineTreeNodeMap.find(ProfileInlineTreeNodeId);
+    if (It == InlineTreeNodeMap.end())
+      return nullptr;
+    return It->second;
   }
 
-  /// Returns the number of blocks matched with opcodes.
-  size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; }
-
-  /// Returns the number of blocks matched with pseudo probes.
-  size_t getNumBlocksMatchedWithPseudoProbes() const {
-    return MatchedWithPseudoProbes.size();
+  void mapInlineTreeNode(uint32_t ProfileNode,
+                         const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+    auto Res = InlineTreeNodeMap.try_emplace(ProfileNode, BinaryNode);
+    assert(Res.second &&
+           "Duplicate mapping from profile node index to binary inline tree");
+    (void)Res;
   }
 
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
-  DenseMap<uint64_t, std::vector<const MCDecodedPseudoProbe *>> IndexToProbes;
-  DenseMap<uint64_t,
-           DenseMap<uint64_t, std::vector<const MCDecodedPseudoProbe *>>>
-      IndexAndGUIDToInlinedProbes;
+  DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> InlineTreeNodeMap;
   DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
-  DenseSet<uint64_t> MatchedWithPseudoProbes;
-  const uint64_t YamlBFGUID{0};
-  uint64_t MatchedWithOpcodes{0};
 
-  // Uses OpcodeHash to find the most similar block for a given hash.
-  const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
+  // Uses OpcodeHash to find the most similar block (with blended hash) for a
+  // given hash.
+  std::pair<const FlowBlock *, BlendedBlockHash>
+  matchWithOpcodes(BlendedBlockHash BlendedHash) const {
     auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
     if (BlockIt == OpHashToBlocks.end())
-      return nullptr;
+      return {nullptr, BlendedBlockHash(0)};
     FlowBlock *BestBlock = nullptr;
     uint64_t BestDist = std::numeric_limits<uint64_t>::max();
+    BlendedBlockHash BestHash;
     for (const auto &[Hash, Block] : BlockIt->second) {
       uint64_t Dist = Hash.distance(BlendedHash);
       if (BestBlock == nullptr || Dist < BestDist) {
         BestDist = Dist;
         BestBlock = Block;
+        BestHash = Hash;
       }
     }
-    return BestBlock;
+    return {BestBlock, BestHash};
   }
 
   // Uses CallHash to find the most similar block for a given hash.
@@ -326,120 +324,71 @@ class StaleMatcher {
     return BestBlock;
   }
 
-  /// A helper function for logging.
-  static bool LogErrIfExpr(bool Expr, StringRef Message) {
-    if (Expr)
-      errs() << Message;
-    return Expr;
-  }
-
-  /// Matches an inlined profile block with an inlined binary block based on
-  /// pseudo probes.
-  const FlowBlock *matchWithInlinedBlockPseudoProbes(
-      SmallVector<const yaml::bolt::PseudoProbeInfo *>
-          &InlinedBlockPseudoProbes) const {
-    if (opts::Verbosity >= 3)
-      outs() << "BOLT-INFO: attempting to match block with inlined block "
-                "pseudo probes\n";
-
-    size_t NInlinedBlockPseudoProbes = InlinedBlockPseudoProbes.size();
-    if (LogErrIfExpr(NInlinedBlockPseudoProbes == 0,
-                     "BOLT-WARNING: no pseudo probes in profile block\n"))
-      return nullptr;
-    if (LogErrIfExpr(
-            NInlinedBlockPseudoProbes > 1,
-            "BOLT-WARNING: more than 1 pseudo probes in profile block\n"))
-      return nullptr;
-
-    const auto *InlinedPseudoProbe = InlinedBlockPseudoProbes[0];
-    uint64_t Guid = InlinedPseudoProbe->GUID;
-    uint64_t Index = InlinedPseudoProbe->Index;
-
-    auto GuidIt = IndexAndGUIDToInlinedProbes.find(Guid);
-    if (LogErrIfExpr(
-            GuidIt == IndexAndGUIDToInlinedProbes.end(),
-            "BOLT-WARNING: no pseudo probes found within BB at index\n"))
-      return nullptr;
-    auto IndexIt = GuidIt->second.find(Index);
-    if (LogErrIfExpr(
-            IndexIt == GuidIt->second.end(),
-            "BOLT-WARNING: no pseudo probes found within BB at index\n"))
-      return nullptr;
-
-    if (LogErrIfExpr(
-            IndexIt->second.size() > 1,
-            "BOLT-WARNING: more than 1 block pseudo probes in BB at index\n"))
-      return nullptr;
-
-    const MCDecodedPseudoProbe *BinaryPseudoProbe = IndexIt->second[0];
-    auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe);
-    assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() &&
-           "All binary pseudo probes should belong a binary basic block");
-
-    return BinaryPseudoProbeIt->second;
-  }
-
   /// Matches a profile block with an binary block based on pseudo probes.
-  const FlowBlock *matchWithNonInlinedBlockPseudoProbes(
-      SmallVector<const yaml::bolt::PseudoProbeInfo *> &BlockPseudoProbes)
-      const {
-    if (opts::Verbosity >= 3)
-      outs() << "BOLT-INFO: attempting to match block with inlined block "
-                "pseudo probes\n";
-
-    size_t NBlockPseudoProbes = BlockPseudoProbes.size();
-    if (LogErrIfExpr(NBlockPseudoProbes == 0,
-                     "BOLT-WARNING: no pseudo probes in profile block\n"))
-      return nullptr;
-    if (LogErrIfExpr(
-            NBlockPseudoProbes > 1,
-            "BOLT-WARNING: more than 1 pseudo probes in profile block\n"))
-      return nullptr;
-    uint64_t Index = BlockPseudoProbes[0]->Index;
-    auto It = IndexToProbes.find(Index);
-    if (LogErrIfExpr(
-            It == IndexToProbes.end(),
-            "BOLT-WARNING: no block pseudo probes found within BB at index\n"))
-      return nullptr;
-    if (LogErrIfExpr(
-            It->second.size() > 1,
-            "BOLT-WARNING: more than 1 block pseudo probes in BB at index\n"))
-      return nullptr;
-    const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
-    auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe);
-    assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() &&
-           "All binary pseudo probes should belong a binary basic block");
+  /// Returns the best matching block (or nullptr) and whether the match is
+  /// unambiguous.
+  std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
+      const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
+      const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) const {
+    if (!opts::StaleMatchingWithBlockPseudoProbes)
+      return {nullptr, false};
+
+    auto logIf = [](bool Expr, StringRef Message) {
+      LLVM_DEBUG(if (Expr) errs() << Message << '\n');
+      return Expr;
+    };
 
-    return BinaryPseudoProbeIt->second;
-  }
+    DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
 
-  /// Uses pseudo probe information to attach the profile to the appropriate
-  /// block.
-  const FlowBlock *matchWithPseudoProbes(
-      BlendedBlockHash BlendedHash,
-      const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) const {
-    if (!opts::StaleMatchingWithBlockPseudoProbes || !YamlBFGUID)
-      return nullptr;
-
-    // Searches for the pseudo probe attached to the matched function's block.
-    SmallVector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
-    SmallVector<const yaml::bolt::PseudoProbeInfo *> InlinedBlockPseudoProbes;
-    for (const auto &PseudoProbe : PseudoProbes) {
-      // Skips pseudo probes attached to function calls.
-      if (PseudoProbe.Type != static_cast<uint8_t>(PseudoProbeType::Block))
+    for (const yaml::bolt::PseudoProbeInfo &Probe : BlockPseudoProbes) {
+      const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+          getInlineTreeNode(Probe.InlineTreeIndex);
+      if (logIf(!InlineTreeNode,
+                formatv("no matching inline tree node for {0} {1}",
+                        Probe.InlineTreeIndex, Probe.Index).str())) {
+        ++FlowBlockMatchCount[nullptr];
         continue;
-      if (PseudoProbe.GUID != YamlBFGUID)
-        InlinedBlockPseudoProbes.push_back(&PseudoProbe);
-      else
-        BlockPseudoProbes.push_back(&PseudoProbe);
+      }
+      const MCDecodedPseudoProbe *BinaryProbe = nullptr;
+      for (const MCDecodedPseudoProbe &FuncProbe :
+           InlineTreeNode->getProbes()) {
+        if (FuncProbe.getIndex() != Probe.Index)
+          continue;
+        BinaryProbe = &FuncProbe;
+        break;
+      }
+      if (logIf(!BinaryProbe, formatv("no matching binary probe for {0} {1}",
+                                      Probe.InlineTreeIndex, Probe.Index)
+                                  .str())) {
+        ++FlowBlockMatchCount[nullptr];
+        continue;
+      }
+      auto It = BBPseudoProbeToBlock.find(BinaryProbe);
+      if (logIf(It == BBPseudoProbeToBlock.end(),
+                formatv("no probe->block for {0} {1}", Probe.InlineTreeIndex,
+                        Probe.Index)
+                    .str())) {
+        ++FlowBlockMatchCount[nullptr];
+        continue;
+      }
+      const FlowBlock *Block = It->second;
+      ++FlowBlockMatchCount[Block];
+    }
+    uint32_t BestMatchCount = 0;
+    uint32_t TotalMatchCount = 0;
+    const FlowBlock *BestMatchBlock = nullptr;
+    for (auto &[Block, Count] : FlowBlockMatchCount) {
+      logIf(true, formatv("block {0} count {1}",
+                          Block ? Block->Index : UINT64_MAX, Count)
+                      .str());
+      TotalMatchCount += Count;
+      if (Count > BestMatchCount ||
+          (Count == BestMatchCount && !BestMatchBlock)) {
+        BestMatchBlock = Block;
+        BestMatchCount = Count;
+      }
     }
-    // Returns nullptr if there is not a 1:1 mapping of the profile block pseudo
-    // probe and a binary block pseudo probe.
-    const FlowBlock *MatchedInlinedBlock =
-        matchWithInlinedBlockPseudoProbes(InlinedBlockPseudoProbes);
-    return MatchedInlinedBlock
-               ? MatchedInlinedBlock
-               : matchWithNonInlinedBlockPseudoProbes(BlockPseudoProbes);
+    return {BestMatchBlock, BestMatchCount / TotalMatchCount};
   }
 };
 
@@ -630,26 +579,7 @@ size_t matchWeightsByHashes(
 
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
-  // Sets the YamlBFGUID in the StaleMatcher such that if either the profiled or
-  // binary function dne or they are not equal, to zero, as not to perform
-  // pseudo probe block matching. Otherwise, the YamlBF's GUID is used for
-  // pseudo probe block matching.
-  const MCPseudoProbeDecoder *PseudoProbeDecoder =
-      opts::ProfileUsePseudoProbes && opts::StaleMatchingWithBlockPseudoProbes
-          ? BC.getPseudoProbeDecoder()
-          : nullptr;
-  uint64_t BFPseudoProbeDescHash = 0;
-  if (opts::ProfileUsePseudoProbes &&
-      opts::StaleMatchingWithBlockPseudoProbes && BF.getGUID() != 0) {
-    assert(PseudoProbeDecoder &&
-           "If BF has pseudo probe, BC should have a pseudo probe decoder");
-    auto &GUID2FuncDescMap = PseudoProbeDecoder->getGUID2FuncDescMap();
-    auto It = GUID2FuncDescMap.find(BF.getGUID());
-    if (It != GUID2FuncDescMap.end())
-      BFPseudoProbeDescHash = It->second.FuncHash;
-  }
-
-  StaleMatcher Matcher(YamlBF.GUID);
+  StaleMatcher Matcher;
   std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
@@ -672,38 +602,55 @@ size_t matchWeightsByHashes(
     Blocks.push_back(&Func.Blocks[I + 1]);
     BlendedBlockHash BlendedHash(BB->getHash());
     BlendedHashes.push_back(BlendedHash);
-    // Collects pseudo probes attached to the BB for use in the StaleMatcher.
-    if (opts::ProfileUsePseudoProbes &&
-        opts::StaleMatchingWithBlockPseudoProbes && BFPseudoProbeDescHash &&
-        YamlBF.PseudoProbeDescHash &&
-        BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash) {
-      assert(PseudoProbeDecoder &&
-             "If pseudo probes are in use, psuedo probe decoder should exist");
-      const AddressProbesMap &ProbeMap =
-          PseudoProbeDecoder->getAddress2ProbesMap();
-      const uint64_t FuncAddr = BF.getAddress();
-      const std::pair<uint64_t, uint64_t> &BlockRange =
-          BB->getInputAddressRange();
-      const auto &BlockProbes =
-          llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
-                           ProbeMap.lower_bound(FuncAddr + BlockRange.second));
-      for (const auto &[_, Probes] : BlockProbes) {
-        for (const MCDecodedPseudoProbe &Probe : Probes) {
-          if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block))
-            continue;
-          if (Probe.getInlineTreeNode()->hasInlineSite())
-            Matcher.mapGUIDAndIndexToProbe(Probe.getGuid(), Probe.getIndex(),
-                                           &Probe);
-          else
-            Matcher.mapIndexToProbe(Probe.getIndex(), &Probe);
-          Matcher.mapProbeToBB(&Probe, Blocks[I]);
-        }
-      }
-    }
 
     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
+  // Collects function pseudo probes for use in the StaleMatcher.
+  if (opts::StaleMatchingWithBlockPseudoProbes) {
+    const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
+    assert(PseudoProbeDecoder &&
+           "If pseudo probes are in use, pseudo probe decoder should exist");
+    const AddressProbesMap &ProbeMap =
+        PseudoProbeDecoder->getAddress2ProbesMap();
+    const uint64_t FuncAddr = BF.getAddress();
+    for (const MCDecodedPseudoProbe &Probe :
+         ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
+      if (const BinaryBasicBlock *BB =
+              BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
+        Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
+    // Match inline tree nodes by GUID, checksum, parent, and call site.
+    unsigned MatchedNodes = 0;
+    const MCDecodedPseudoProbeInlineTree *DummyInlineRoot =
+        &PseudoProbeDecoder->getDummyInlineRoot();
+    for (const yaml::bolt::InlineTreeInfo &InlineTreeNode : YamlBF.InlineTree) {
+      uint64_t GUID = InlineTreeNode.GUID;
+      uint64_t Hash = InlineTreeNode.Hash;
+      uint32_t InlineTreeNodeId = InlineTreeNode.Index;
+      uint32_t ParentId = InlineTreeNode.ParentIndex;
+      uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+      const MCDecodedPseudoProbeInlineTree *ParentNode =
+          InlineTreeNodeId ? Matcher.getInlineTreeNode(ParentId)
+                           : DummyInlineRoot;
+      if (!ParentNode)
+        continue;
+      for (const MCDecodedPseudoProbeInlineTree &Child :
+           ParentNode->getChildren()) {
+        if (Child.Guid != GUID ||
+            PseudoProbeDecoder->getFuncDescForGUID(GUID)->FuncHash != Hash)
+          continue;
+        // Check inline site for non-toplev inline tree nodes.
+        if (ParentNode != DummyInlineRoot &&
+            std::get<1>(Child.getInlineSite()) != CallSiteProbe)
+          continue;
+        Matcher.mapInlineTreeNode(InlineTreeNodeId, &Child);
+        ++MatchedNodes;
+        break;
+      }
+    }
+    LLVM_DEBUG(errs() << "matched " << MatchedNodes << "/"
+                      << YamlBF.InlineTree.size() << " inline tree nodes\n");
+  }
   Matcher.init(Blocks, BlendedHashes, CallHashes);
 
   // Index in yaml profile => corresponding (matched) block
@@ -724,7 +671,9 @@ size_t matchWeightsByHashes(
       else
         llvm_unreachable("Unhandled HashFunction");
     }
-    MatchedBlock = Matcher.matchBlock(YamlHash, CallHash, YamlBB.PseudoProbes);
+    StaleMatcher::MatchMethod Method;
+    std::tie(MatchedBlock, Method) = Matcher.matchBlock(
+        YamlHash, CallHash, YamlBB.PseudoProbes, YamlBF.InlineTree);
     if (MatchedBlock == nullptr && YamlBB.Index == 0)
       MatchedBlock = Blocks[0];
     if (MatchedBlock != nullptr) {
@@ -737,16 +686,34 @@ size_t matchWeightsByHashes(
                         << " with hash " << Twine::utohexstr(BinHash.combine())
                         << "\n");
       // Update matching stats accounting for the matched block.
-      if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
+      switch (Method) {
+      case StaleMatcher::MATCH_EXACT:
         ++BC.Stats.NumExactMatchedBlocks;
         BC.Stats.ExactMatchedSampleCount += YamlBB.ExecCount;
-        LLVM_DEBUG(dbgs() << "  exact match\n");
-      } else if (Matcher.isPseudoProbeMatch(YamlHash)) {
-        ++BC.Stats.NumPseudoProbeMatchedBlocks;
-        BC.Stats.PseudoProbeMatchedSampleCount += YamlBB.ExecCount;
-        LLVM_DEBUG(dbgs() << "  pseudo probe match\n");
-      } else {
-        LLVM_DEBUG(dbgs() << "  loose match\n");
+        LLVM_DEBUG(dbgs() << "  exact hash match\n");
+        break;
+      case StaleMatcher::MATCH_PROBE_EXACT:
+        ++BC.Stats.NumPseudoProbeExactMatchedBlocks;
+        BC.Stats.PseudoProbeExactMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  exact pseudo probe match\n");
+        break;
+      case StaleMatcher::MATCH_PROBE_LOOSE:
+        ++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
+        BC.Stats.PseudoProbeLooseMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  loose pseudo probe match\n");
+        break;
+      case StaleMatcher::MATCH_CALL:
+        ++BC.Stats.NumCallMatchedBlocks;
+        BC.Stats.CallMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  call match\n");
+        break;
+      case StaleMatcher::MATCH_OPCODE:
+        ++BC.Stats.NumLooseMatchedBlocks;
+        BC.Stats.LooseMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  loose hash match\n");
+        break;
+      case StaleMatcher::NO_MATCH:
+        LLVM_DEBUG(dbgs() << "  no match\n");
       }
       if (YamlBB.NumInstructions == BB->size())
         ++BC.Stats.NumStaleBlocksWithEqualIcount;
@@ -761,13 +728,6 @@ size_t matchWeightsByHashes(
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
-  if (opts::Verbosity >= 2) {
-    outs() << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithPseudoProbes()
-           << " blocks matched with pseudo probes\n"
-           << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes()
-           << " blocks matched with opcodes\n";
-  }
-
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4b3f9ab4cb64ae..43ab0d9fd63e51 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -51,6 +51,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
     cl::Hidden, cl::cat(BoltCategory));
 
 extern cl::opt<bool> ProfileWritePseudoProbes;
+extern cl::opt<bool> StaleMatchingWithBlockPseudoProbes;
 } // namespace opts
 
 namespace {
@@ -92,14 +93,15 @@ class PseudoProbeRewriter final : public MetadataRewriter {
 };
 
 Error PseudoProbeRewriter::preCFGInitializer() {
-  if (opts::ProfileWritePseudoProbes)
-    parsePseudoProbe(true);
+  if (opts::ProfileWritePseudoProbes ||
+      opts::StaleMatchingWithBlockPseudoProbes)
+    parsePseudoProbe(opts::ProfileWritePseudoProbes);
 
   return Error::success();
 }
 
 Error PseudoProbeRewriter::postEmitFinalizer() {
-  if (!opts::ProfileWritePseudoProbes)
+  if (!opts::StaleMatchingWithBlockPseudoProbes)
     parsePseudoProbe();
   updatePseudoProbes();
 

>From 8fafc048b46e8076db353ea6a7eea7b2ebae48d7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 4 Sep 2024 16:05:10 -0700
Subject: [PATCH 2/3] drop logIf

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4f637c3e4f2eb2..110769aa31e8e1 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -331,19 +331,12 @@ class StaleMatcher {
     if (!opts::StaleMatchingWithBlockPseudoProbes)
       return {nullptr, false};
 
-    auto logIf = [](bool Expr, StringRef Message) {
-      LLVM_DEBUG(if (Expr) errs() << Message << '\n');
-      return Expr;
-    };
-
     DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
 
     for (const yaml::bolt::PseudoProbeInfo &Probe : BlockPseudoProbes) {
       const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
           getInlineTreeNode(Probe.InlineTreeIndex);
-      if (logIf(!InlineTreeNode,
-                formatv("no matching inline tree node for {0} {1}",
-                        Probe.InlineTreeIndex, Probe.Index).str())) {
+      if (!InlineTreeNode) {
         ++FlowBlockMatchCount[nullptr];
         continue;
       }
@@ -355,17 +348,12 @@ class StaleMatcher {
         BinaryProbe = &FuncProbe;
         break;
       }
-      if (logIf(!BinaryProbe, formatv("no matching binary probe for {0} {1}",
-                                      Probe.InlineTreeIndex, Probe.Index)
-                                  .str())) {
+      if (!BinaryProbe) {
         ++FlowBlockMatchCount[nullptr];
         continue;
       }
       auto It = BBPseudoProbeToBlock.find(BinaryProbe);
-      if (logIf(It == BBPseudoProbeToBlock.end(),
-                formatv("no probe->block for {0} {1}", Probe.InlineTreeIndex,
-                        Probe.Index)
-                    .str())) {
+      if (It == BBPseudoProbeToBlock.end()) {
         ++FlowBlockMatchCount[nullptr];
         continue;
       }
@@ -376,9 +364,6 @@ class StaleMatcher {
     uint32_t TotalMatchCount = 0;
     const FlowBlock *BestMatchBlock = nullptr;
     for (auto &[Block, Count] : FlowBlockMatchCount) {
-      logIf(true, formatv("block {0} count {1}",
-                          Block ? Block->Index : UINT64_MAX, Count)
-                      .str());
       TotalMatchCount += Count;
       if (Count > BestMatchCount ||
           (Count == BestMatchCount && !BestMatchBlock)) {

>From ebd3acfb23808721059c73defc5a33965fbb57ff Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 26 Sep 2024 13:50:19 -0700
Subject: [PATCH 3/3] Allow null block participate in majority vote, improves
 run-time performance

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4c60b9df875c5d..ef70b979bad201 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -372,8 +372,6 @@ class StaleMatcher {
     const FlowBlock *BestMatchBlock = nullptr;
     for (auto &[FlowBlock, Count] : FlowBlockMatchCount) {
       TotalMatchCount += Count;
-      if (!FlowBlock)
-        continue;
       if (Count > BestMatchCount ||
           (Count == BestMatchCount && !BestMatchBlock)) {
         BestMatchBlock = FlowBlock;



More information about the llvm-branch-commits mailing list