[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

Shaw Young via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 23 17:16:32 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/99891

>From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 22 Jul 2024 11:56:23 -0700
Subject: [PATCH 01/10] Changed assignment of profiles with pseudo probe index

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 85 +++++++++++++++----
 .../X86/match-blocks-with-pseudo-probes.test  | 25 ++----
 2 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4105f626fb5b6..c135ee5ff4837 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -195,11 +195,15 @@ class StaleMatcher {
   void init(const std::vector<FlowBlock *> &Blocks,
             const std::vector<BlendedBlockHash> &Hashes,
             const std::vector<uint64_t> &CallHashes,
-            std::optional<uint64_t> YamlBFGUID) {
+            const std::unordered_map<uint64_t,
+                                     std::vector<const MCDecodedPseudoProbe *>>
+                IndexToBinaryPseudoProbes,
+            const std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+                BinaryPseudoProbeToBlock,
+            const uint64_t YamlBFGUID) {
     assert(Blocks.size() == Hashes.size() &&
            Hashes.size() == CallHashes.size() &&
            "incorrect matcher initialization");
-
     for (size_t I = 0; I < Blocks.size(); I++) {
       FlowBlock *Block = Blocks[I];
       uint16_t OpHash = Hashes[I].OpcodeHash;
@@ -209,6 +213,8 @@ class StaleMatcher {
             std::make_pair(Hashes[I], Block));
       this->Blocks.push_back(Block);
     }
+    this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
+    this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
     this->YamlBFGUID = YamlBFGUID;
   }
 
@@ -234,10 +240,14 @@ class StaleMatcher {
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
-  std::vector<FlowBlock *> Blocks;
+  std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
+      IndexToBinaryPseudoProbes;
+  std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+      BinaryPseudoProbeToBlock;
+  std::vector<const FlowBlock *> Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
-  std::optional<uint64_t> YamlBFGUID;
+  uint64_t YamlBFGUID;
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -284,7 +294,7 @@ class StaleMatcher {
     // Searches for the pseudo probe attached to the matched function's block,
     // ignoring pseudo probes attached to function calls and inlined functions'
     // blocks.
-    outs() << "match with pseudo probes\n";
+    std::vector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
     for (const auto &PseudoProbe : PseudoProbes) {
       // Ensures that pseudo probe information belongs to the appropriate
       // function and not an inlined function.
@@ -293,11 +303,30 @@ class StaleMatcher {
       // Skips pseudo probes attached to function calls.
       if (PseudoProbe.Type != static_cast<uint8_t>(PseudoProbeType::Block))
         continue;
-      assert(PseudoProbe.Index < Blocks.size() &&
-             "pseudo probe index out of range");
-      return Blocks[PseudoProbe.Index];
+
+      BlockPseudoProbes.push_back(&PseudoProbe);
     }
-    return nullptr;
+
+    // Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
+    // probe and binary pseudo probe.
+    if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1)
+      return nullptr;
+
+    uint64_t Index = BlockPseudoProbes[0]->Index;
+    assert(Index < Blocks.size() && "Invalid pseudo probe index");
+
+    auto It = IndexToBinaryPseudoProbes.find(Index);
+    assert(It != IndexToBinaryPseudoProbes.end() &&
+           "All blocks should have a pseudo probe");
+    if (It->second.size() > 1)
+      return nullptr;
+
+    const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
+    auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
+    assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+           "All binary pseudo probes should belong a binary basic block");
+
+    return BinaryPseudoProbeIt->second;
   }
 };
 
@@ -491,6 +520,11 @@ size_t matchWeightsByHashes(
   std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
+  std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
+      IndexToBinaryPseudoProbes;
+  std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+      BinaryPseudoProbeToBlock;
+  const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -510,9 +544,27 @@ size_t matchWeightsByHashes(
     Blocks.push_back(&Func.Blocks[I + 1]);
     BlendedBlockHash BlendedHash(BB->getHash());
     BlendedHashes.push_back(BlendedHash);
+    if (PseudoProbeDecoder) {
+      const AddressProbesMap &ProbeMap =
+          PseudoProbeDecoder->getAddress2ProbesMap();
+      const uint64_t FuncAddr = BF.getAddress();
+      const std::pair<uint64_t, uint64_t> &BlockRange =
+          BB->getInputAddressRange();
+      const auto &BlockProbes =
+          llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
+                           ProbeMap.lower_bound(FuncAddr + BlockRange.second));
+      for (const auto &[_, Probes] : BlockProbes) {
+        for (const MCDecodedPseudoProbe &Probe : Probes) {
+          IndexToBinaryPseudoProbes[Probe.getIndex()].push_back(&Probe);
+          BinaryPseudoProbeToBlock[&Probe] = Blocks[I];
+        }
+      }
+    }
+
     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
+
   uint64_t BFPseudoProbeDescHash = 0;
   if (BF.hasPseudoProbe()) {
     const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
@@ -521,14 +573,15 @@ size_t matchWeightsByHashes(
     BFPseudoProbeDescHash =
         PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash;
   }
-  bool MatchWithPseudoProbes =
-      BFPseudoProbeDescHash && YamlBF.PseudoProbeDescHash
-          ? BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash
-          : false;
+  uint64_t YamlBFGUID =
+      BFPseudoProbeDescHash && YamlBF.PseudoProbeDescHash &&
+              BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash
+          ? static_cast<uint64_t>(YamlBF.GUID)
+          : 0;
+
   StaleMatcher Matcher;
-  Matcher.init(Blocks, BlendedHashes, CallHashes,
-               MatchWithPseudoProbes ? std::make_optional(YamlBF.GUID)
-                                     : std::nullopt);
+  Matcher.init(Blocks, BlendedHashes, CallHashes, IndexToBinaryPseudoProbes,
+               BinaryPseudoProbeToBlock, YamlBFGUID);
 
   // Index in yaml profile => corresponding (matched) block
   DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
index e0adb6948e206..1d74b92a11c56 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -5,7 +5,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
 # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
-# RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s
+# RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile 2>&1 | FileCheck %s
 
 # CHECK: BOLT-INFO: matched 0 functions with similar names
 
@@ -47,23 +47,16 @@ header:
   dfs-order:       false
   hash-func:       xxh3
 functions:
-  - name:            main
-    fid:             0
-    hash:            0x0000000000000001
-    exec:            1
-    nblocks:         6
+  - name:                   main
+    fid:                    0
+    hash:                   0x0000000000000001
+    exec:                   1
+    nblocks:                6
+    guid:                   0xDB956436E78DD5FA
+    pseudo_probe_desc_hash: 15822663052811949562    #lookup in code in a second
     blocks:
       - bid:             1
         hash:            0x0000000000000001
         insns:           1
         succ:            [ { bid: 3, cnt: 1} ]
-  - name:            foo
-    fid:             1
-    hash:            0x0000000000000002
-    exec:            1
-    nblocks:         6
-    blocks:
-      - bid:             1
-        hash:            0x0000000000000002
-        insns:           1
-        succ:            [ { bid: 3, cnt: 1} ]
+        pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 0, type: 0 } ]

>From 7e3d8d6b171954836c858f0814befc54f70bd3aa Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 22 Jul 2024 14:27:44 -0700
Subject: [PATCH 02/10] Edit test and assert

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp          | 2 +-
 bolt/test/X86/match-blocks-with-pseudo-probes.test | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index c135ee5ff4837..71e0579415fc6 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -313,7 +313,7 @@ class StaleMatcher {
       return nullptr;
 
     uint64_t Index = BlockPseudoProbes[0]->Index;
-    assert(Index < Blocks.size() && "Invalid pseudo probe index");
+    assert(Index <= Blocks.size() && "Invalid pseudo probe index");
 
     auto It = IndexToBinaryPseudoProbes.find(Index);
     assert(It != IndexToBinaryPseudoProbes.end() &&
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
index 1d74b92a11c56..6dc01eb492eae 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -53,10 +53,10 @@ functions:
     exec:                   1
     nblocks:                6
     guid:                   0xDB956436E78DD5FA
-    pseudo_probe_desc_hash: 15822663052811949562    #lookup in code in a second
+    pseudo_probe_desc_hash: 4294967295    #lookup in code in a second
     blocks:
       - bid:             1
         hash:            0x0000000000000001
         insns:           1
         succ:            [ { bid: 3, cnt: 1} ]
-        pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 0, type: 0 } ]
+        pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 } ]

>From 780a07ee5a4b2bc3f5bd6e33fb072d67d1113c89 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 11:37:14 -0700
Subject: [PATCH 03/10] Fixed failing asserts, pruned prospective pseudo probes
 for matching

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 56 ++++++++++++++++-------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 71e0579415fc6..d45066ed66ef2 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -45,6 +45,7 @@ namespace opts {
 
 extern cl::opt<bool> TimeRewrite;
 extern cl::OptionCategory BoltOptCategory;
+extern cl::opt<unsigned> Verbosity;
 
 cl::opt<bool>
     InferStaleProfile("infer-stale-profile",
@@ -197,9 +198,9 @@ class StaleMatcher {
             const std::vector<uint64_t> &CallHashes,
             const std::unordered_map<uint64_t,
                                      std::vector<const MCDecodedPseudoProbe *>>
-                IndexToBinaryPseudoProbes,
+                &IndexToBinaryPseudoProbes,
             const std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
-                BinaryPseudoProbeToBlock,
+                &BinaryPseudoProbeToBlock,
             const uint64_t YamlBFGUID) {
     assert(Blocks.size() == Hashes.size() &&
            Hashes.size() == CallHashes.size() &&
@@ -294,6 +295,9 @@ class StaleMatcher {
     // Searches for the pseudo probe attached to the matched function's block,
     // ignoring pseudo probes attached to function calls and inlined functions'
     // blocks.
+    if (opts::Verbosity >= 2)
+      outs() << "BOLT-INFO: attempting to match block with pseudo probes\n";
+
     std::vector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
     for (const auto &PseudoProbe : PseudoProbes) {
       // Ensures that pseudo probe information belongs to the appropriate
@@ -306,26 +310,41 @@ class StaleMatcher {
 
       BlockPseudoProbes.push_back(&PseudoProbe);
     }
-
     // Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
     // probe and binary pseudo probe.
-    if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1)
+    if (BlockPseudoProbes.size() == 0) {
+      if (opts::Verbosity >= 2)
+        errs() << "BOLT-WARNING: no pseudo probes in profile block\n";
       return nullptr;
-
+    }
+    if (BlockPseudoProbes.size() > 1) {
+      if (opts::Verbosity >= 2)
+        errs() << "BOLT-WARNING: more than 1 pseudo probes in profile block\n";
+      return nullptr;
+    }
     uint64_t Index = BlockPseudoProbes[0]->Index;
-    assert(Index <= Blocks.size() && "Invalid pseudo probe index");
-
+    if (Index > Blocks.size()) {
+      if (opts::Verbosity >= 2)
+        errs() << "BOLT-WARNING: invalid index block pseudo probe index\n";
+      return nullptr;
+    }
     auto It = IndexToBinaryPseudoProbes.find(Index);
-    assert(It != IndexToBinaryPseudoProbes.end() &&
-           "All blocks should have a pseudo probe");
-    if (It->second.size() > 1)
+    if (It == IndexToBinaryPseudoProbes.end()) {
+      if (opts::Verbosity >= 2)
+        errs() << "BOLT-WARNING: no block pseudo probes found within binary "
+                  "block at index\n";
       return nullptr;
-
+    }
+    if (It->second.size() > 1) {
+      if (opts::Verbosity >= 2)
+        errs() << "BOLT-WARNING: more than 1 block pseudo probes in binary "
+                  "block at index\n";
+      return nullptr;
+    }
     const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
     auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
     assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
            "All binary pseudo probes should belong a binary basic block");
-
     return BinaryPseudoProbeIt->second;
   }
 };
@@ -555,6 +574,10 @@ size_t matchWeightsByHashes(
                            ProbeMap.lower_bound(FuncAddr + BlockRange.second));
       for (const auto &[_, Probes] : BlockProbes) {
         for (const MCDecodedPseudoProbe &Probe : Probes) {
+          if (Probe.getInlineTreeNode()->hasInlineSite())
+            continue;
+          if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block))
+            continue;
           IndexToBinaryPseudoProbes[Probe.getIndex()].push_back(&Probe);
           BinaryPseudoProbeToBlock[&Probe] = Blocks[I];
         }
@@ -566,12 +589,13 @@ size_t matchWeightsByHashes(
   }
 
   uint64_t BFPseudoProbeDescHash = 0;
-  if (BF.hasPseudoProbe()) {
-    const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
+  if (BF.getGUID() != 0) {
     assert(PseudoProbeDecoder &&
            "If BF has pseudo probe, BC should have a pseudo probe decoder");
-    BFPseudoProbeDescHash =
-        PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash;
+    auto &GUID2FuncDescMap = PseudoProbeDecoder->getGUID2FuncDescMap();
+    auto It = GUID2FuncDescMap.find(BF.getGUID());
+    if (It != GUID2FuncDescMap.end())
+      BFPseudoProbeDescHash = It->second.FuncHash;
   }
   uint64_t YamlBFGUID =
       BFPseudoProbeDescHash && YamlBF.PseudoProbeDescHash &&

>From 1638ac1dacec63d9099ae3c19f2fee7c0797ed71 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 14:24:02 -0700
Subject: [PATCH 04/10] Added logging for pseudo probe block matching

Created using spr 1.3.4
---
 bolt/include/bolt/Core/BinaryContext.h    | 12 ++++++---
 bolt/lib/Passes/BinaryPasses.cpp          | 18 +++++++++++---
 bolt/lib/Profile/StaleProfileMatching.cpp | 30 +++++++++++++++++------
 3 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index b3cf9f834cc08..39f2ac512d305 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -717,12 +717,16 @@ class BinaryContext {
     /// Stats for stale profile matching:
     ///   the total number of basic blocks in the profile
     uint32_t NumStaleBlocks{0};
-    ///   the number of matched basic blocks
-    uint32_t NumMatchedBlocks{0};
+    ///   the number of exactly matched basic blocks
+    uint32_t NumExactMatchedBlocks{0};
+    ///   the number of pseudo probe matched basic blocks
+    uint32_t NumPseudoProbeMatchedBlocks{0};
     ///   the total count of samples in the profile
     uint64_t StaleSampleCount{0};
-    ///   the count of matched samples
-    uint64_t MatchedSampleCount{0};
+    ///   the count of exactly matched samples
+    uint64_t ExactMatchedSampleCount{0};
+    ///   the count of pseudo probe matched samples
+    uint64_t PseudoProbeMatchedSampleCount{0};
     ///   the number of stale functions that have matching number of blocks in
     ///   the profile
     uint64_t NumStaleFuncsWithEqualBlockCount{0};
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index fa95ad7324ac1..b786f07a6a665 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1519,10 +1519,20 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
         "BOLT-INFO: inference found an exact match for %.2f%% of basic blocks"
         " (%zu out of %zu stale) responsible for %.2f%% samples"
         " (%zu out of %zu stale)\n",
-        100.0 * BC.Stats.NumMatchedBlocks / BC.Stats.NumStaleBlocks,
-        BC.Stats.NumMatchedBlocks, BC.Stats.NumStaleBlocks,
-        100.0 * BC.Stats.MatchedSampleCount / BC.Stats.StaleSampleCount,
-        BC.Stats.MatchedSampleCount, BC.Stats.StaleSampleCount);
+        100.0 * BC.Stats.NumExactMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumExactMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount,
+        BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount);
+    BC.outs() << format(
+        "BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic "
+        "blocks"
+        " (%zu out of %zu stale) responsible for %.2f%% samples"
+        " (%zu out of %zu stale)\n",
+        100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.PseudoProbeMatchedSampleCount /
+            BC.Stats.StaleSampleCount,
+        BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount);
   }
 
   if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) {
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index d45066ed66ef2..919f3a732b355 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -220,13 +220,14 @@ class StaleMatcher {
   }
 
   /// Find the most similar block for a given hash.
-  const FlowBlock *matchBlock(
-      BlendedBlockHash BlendedHash, uint64_t CallHash,
-      const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) const {
+  const FlowBlock *
+  matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash,
+             const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
     const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
     BestBlock = BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
-    return BestBlock || !YamlBFGUID ? BestBlock
-                                    : matchWithPseudoProbes(PseudoProbes);
+    return BestBlock || !YamlBFGUID
+               ? BestBlock
+               : matchWithPseudoProbes(BlendedHash, PseudoProbes);
   }
 
   /// Returns true if the two basic blocks (in the binary and in the profile)
@@ -237,6 +238,11 @@ class StaleMatcher {
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
+  bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
+    return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
+           MatchedWithPseudoProbes.end();
+  }
+
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
@@ -245,6 +251,7 @@ class StaleMatcher {
       IndexToBinaryPseudoProbes;
   std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
       BinaryPseudoProbeToBlock;
+  std::unordered_set<uint64_t> MatchedWithPseudoProbes;
   std::vector<const FlowBlock *> Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
@@ -291,7 +298,8 @@ class StaleMatcher {
   // Uses pseudo probe information to attach the profile to the appropriate
   // block.
   const FlowBlock *matchWithPseudoProbes(
-      const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) const {
+      BlendedBlockHash BlendedHash,
+      const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
     // Searches for the pseudo probe attached to the matched function's block,
     // ignoring pseudo probes attached to function calls and inlined functions'
     // blocks.
@@ -345,6 +353,8 @@ class StaleMatcher {
     auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
     assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
            "All binary pseudo probes should belong a binary basic block");
+
+    MatchedWithPseudoProbes.insert(BlendedHash.combine());
     return BinaryPseudoProbeIt->second;
   }
 };
@@ -639,9 +649,13 @@ size_t matchWeightsByHashes(
                         << "\n");
       // Update matching stats accounting for the matched block.
       if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
-        ++BC.Stats.NumMatchedBlocks;
-        BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
+        ++BC.Stats.NumExactMatchedBlocks;
+        BC.Stats.ExactMatchedSampleCount += YamlBB.ExecCount;
         LLVM_DEBUG(dbgs() << "  exact match\n");
+      } else if (Matcher.isPseudoProbeMatch(YamlHash)) {
+        ++BC.Stats.NumPseudoProbeMatchedBlocks;
+        BC.Stats.PseudoProbeMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  pseudo probe match\n");
       } else {
         LLVM_DEBUG(dbgs() << "  loose match\n");
       }

>From 144716be84d2207ee98fb238b88c6495942dec21 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 15:41:31 -0700
Subject: [PATCH 05/10] Changed pseudo probe matching failure logging to v=3

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 919f3a732b355..2d1a73bd60e8f 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -255,7 +255,7 @@ class StaleMatcher {
   std::vector<const FlowBlock *> Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
-  uint64_t YamlBFGUID;
+  uint64_t YamlBFGUID{0};
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -321,30 +321,30 @@ class StaleMatcher {
     // Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
     // probe and binary pseudo probe.
     if (BlockPseudoProbes.size() == 0) {
-      if (opts::Verbosity >= 2)
+      if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: no pseudo probes in profile block\n";
       return nullptr;
     }
     if (BlockPseudoProbes.size() > 1) {
-      if (opts::Verbosity >= 2)
+      if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: more than 1 pseudo probes in profile block\n";
       return nullptr;
     }
     uint64_t Index = BlockPseudoProbes[0]->Index;
     if (Index > Blocks.size()) {
-      if (opts::Verbosity >= 2)
+      if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: invalid index block pseudo probe index\n";
       return nullptr;
     }
     auto It = IndexToBinaryPseudoProbes.find(Index);
     if (It == IndexToBinaryPseudoProbes.end()) {
-      if (opts::Verbosity >= 2)
+      if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: no block pseudo probes found within binary "
                   "block at index\n";
       return nullptr;
     }
     if (It->second.size() > 1) {
-      if (opts::Verbosity >= 2)
+      if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: more than 1 block pseudo probes in binary "
                   "block at index\n";
       return nullptr;

>From 29347109ada65c82fef3aa0803b18c413d9c4e6b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 15:48:14 -0700
Subject: [PATCH 06/10] More loggin

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 2d1a73bd60e8f..3762d91ea9489 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -238,11 +238,17 @@ class StaleMatcher {
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
+  /// Returns true if a profiled block was matched with its pseudo probe.
   bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
     return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
            MatchedWithPseudoProbes.end();
   }
 
+  /// Returns the number of blocks matched with pseudo probes.
+  size_t getNumBlocksMatchedWithPseudoProbes() const {
+    return MatchedWithPseudoProbes.size();
+  } 
+
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
@@ -303,7 +309,7 @@ class StaleMatcher {
     // Searches for the pseudo probe attached to the matched function's block,
     // ignoring pseudo probes attached to function calls and inlined functions'
     // blocks.
-    if (opts::Verbosity >= 2)
+    if (opts::Verbosity >= 3)
       outs() << "BOLT-INFO: attempting to match block with pseudo probes\n";
 
     std::vector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
@@ -672,6 +678,11 @@ size_t matchWeightsByHashes(
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
+  if (opts::Verbosity >= 2)
+    outs() << "BOLT-INFO: " 
+      << StaleMatcher.getNumBlocksMatchedWithPseudoProbes()
+      << " blocks matched with pseudo probes\n";
+
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);

>From b74fc8b2f200b776dcf0e51d505e4e43267ef938 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 16:03:21 -0700
Subject: [PATCH 07/10] Logging blocks matched with opcodes

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 3762d91ea9489..b31bddd47edf9 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -224,6 +224,8 @@ class StaleMatcher {
   matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash,
              const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
     const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
+    if (BestBlock)
+      ++MatchedWithOpcodes;
     BestBlock = BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
     return BestBlock || !YamlBFGUID
                ? BestBlock
@@ -247,7 +249,10 @@ class StaleMatcher {
   /// Returns the number of blocks matched with pseudo probes.
   size_t getNumBlocksMatchedWithPseudoProbes() const {
     return MatchedWithPseudoProbes.size();
-  } 
+  }
+
+  /// Returns the number of blocks matched with opcodes.
+  size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; }
 
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
@@ -259,9 +264,8 @@ class StaleMatcher {
       BinaryPseudoProbeToBlock;
   std::unordered_set<uint64_t> MatchedWithPseudoProbes;
   std::vector<const FlowBlock *> Blocks;
-  // If the pseudo probe checksums of the profiled and binary functions are
-  // equal, then the YamlBF's GUID is defined and used to match blocks.
   uint64_t YamlBFGUID{0};
+  uint64_t MatchedWithOpcodes{0};
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -678,10 +682,13 @@ size_t matchWeightsByHashes(
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
-  if (opts::Verbosity >= 2)
-    outs() << "BOLT-INFO: " 
-      << StaleMatcher.getNumBlocksMatchedWithPseudoProbes()
-      << " blocks matched with pseudo probes\n";
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: "
+           << StaleMatcher.getNumBlocksMatchedWithPseudoProbes()
+           << " blocks matched with pseudo probes\n"
+           << "BOLT-INFO: " << StaleMatcher.getNumBlocksMatchedWithOpcodes()
+           << " blocks matched with opcodes\n";
+  }
 
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);

>From c38fb98fb287d881ce8162fde0522d60b43da56f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 16:10:09 -0700
Subject: [PATCH 08/10] Updated test

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp          | 4 ++--
 bolt/test/X86/match-blocks-with-pseudo-probes.test | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index b31bddd47edf9..c621c29a0db83 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -684,9 +684,9 @@ size_t matchWeightsByHashes(
 
   if (opts::Verbosity >= 2) {
     outs() << "BOLT-INFO: "
-           << StaleMatcher.getNumBlocksMatchedWithPseudoProbes()
+           << Matcher.getNumBlocksMatchedWithPseudoProbes()
            << " blocks matched with pseudo probes\n"
-           << "BOLT-INFO: " << StaleMatcher.getNumBlocksMatchedWithOpcodes()
+           << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes()
            << " blocks matched with opcodes\n";
   }
 
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
index 6dc01eb492eae..83f9c20f31ba6 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -7,7 +7,7 @@
 # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
 # RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile 2>&1 | FileCheck %s
 
-# CHECK: BOLT-INFO: matched 0 functions with similar names
+# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale)
 
 #--- main.s
  .text

>From b2a3ca7fd532828ae7320da6f888f20a1717bb92 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 16:14:34 -0700
Subject: [PATCH 09/10] Name changes in prep for inlined block pseudo probe
 block matching

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 35 +++++++++++------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index c621c29a0db83..4410fddaf0b21 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -198,9 +198,9 @@ class StaleMatcher {
             const std::vector<uint64_t> &CallHashes,
             const std::unordered_map<uint64_t,
                                      std::vector<const MCDecodedPseudoProbe *>>
-                &IndexToBinaryPseudoProbes,
+                &IndexToBBPseudoProbes,
             const std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
-                &BinaryPseudoProbeToBlock,
+                &BBPseudoProbeToBlock,
             const uint64_t YamlBFGUID) {
     assert(Blocks.size() == Hashes.size() &&
            Hashes.size() == CallHashes.size() &&
@@ -214,8 +214,8 @@ class StaleMatcher {
             std::make_pair(Hashes[I], Block));
       this->Blocks.push_back(Block);
     }
-    this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
-    this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
+    this->IndexToBBPseudoProbes = IndexToBBPseudoProbes;
+    this->BBPseudoProbeToBlock = BBPseudoProbeToBlock;
     this->YamlBFGUID = YamlBFGUID;
   }
 
@@ -259,9 +259,9 @@ class StaleMatcher {
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
   std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
-      IndexToBinaryPseudoProbes;
+      IndexToBBPseudoProbes;
   std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
-      BinaryPseudoProbeToBlock;
+      BBPseudoProbeToBlock;
   std::unordered_set<uint64_t> MatchedWithPseudoProbes;
   std::vector<const FlowBlock *> Blocks;
   uint64_t YamlBFGUID{0};
@@ -346,8 +346,8 @@ class StaleMatcher {
         errs() << "BOLT-WARNING: invalid index block pseudo probe index\n";
       return nullptr;
     }
-    auto It = IndexToBinaryPseudoProbes.find(Index);
-    if (It == IndexToBinaryPseudoProbes.end()) {
+    auto It = IndexToBBPseudoProbes.find(Index);
+    if (It == IndexToBBPseudoProbes.end()) {
       if (opts::Verbosity >= 3)
         errs() << "BOLT-WARNING: no block pseudo probes found within binary "
                   "block at index\n";
@@ -360,8 +360,8 @@ class StaleMatcher {
       return nullptr;
     }
     const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
-    auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
-    assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+    auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe);
+    assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() &&
            "All binary pseudo probes should belong a binary basic block");
 
     MatchedWithPseudoProbes.insert(BlendedHash.combine());
@@ -560,9 +560,9 @@ size_t matchWeightsByHashes(
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
   std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
-      IndexToBinaryPseudoProbes;
+      IndexToBBPseudoProbes;
   std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
-      BinaryPseudoProbeToBlock;
+      BBPseudoProbeToBlock;
   const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
@@ -598,8 +598,8 @@ size_t matchWeightsByHashes(
             continue;
           if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block))
             continue;
-          IndexToBinaryPseudoProbes[Probe.getIndex()].push_back(&Probe);
-          BinaryPseudoProbeToBlock[&Probe] = Blocks[I];
+          IndexToBBPseudoProbes[Probe.getIndex()].push_back(&Probe);
+          BBPseudoProbeToBlock[&Probe] = Blocks[I];
         }
       }
     }
@@ -624,8 +624,8 @@ size_t matchWeightsByHashes(
           : 0;
 
   StaleMatcher Matcher;
-  Matcher.init(Blocks, BlendedHashes, CallHashes, IndexToBinaryPseudoProbes,
-               BinaryPseudoProbeToBlock, YamlBFGUID);
+  Matcher.init(Blocks, BlendedHashes, CallHashes, IndexToBBPseudoProbes,
+               BBPseudoProbeToBlock, YamlBFGUID);
 
   // Index in yaml profile => corresponding (matched) block
   DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
@@ -683,8 +683,7 @@ size_t matchWeightsByHashes(
   }
 
   if (opts::Verbosity >= 2) {
-    outs() << "BOLT-INFO: "
-           << Matcher.getNumBlocksMatchedWithPseudoProbes()
+    outs() << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithPseudoProbes()
            << " blocks matched with pseudo probes\n"
            << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes()
            << " blocks matched with opcodes\n";

>From 2eb7bf2cff7c974a3327879fd46df7348fdb43e3 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 23 Jul 2024 17:16:19 -0700
Subject: [PATCH 10/10] Rm unnecessary Blocks vec in StaleMatcher

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4410fddaf0b21..6ee14ef0194bd 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -212,7 +212,6 @@ class StaleMatcher {
       if (CallHashes[I])
         CallHashToBlocks[CallHashes[I]].push_back(
             std::make_pair(Hashes[I], Block));
-      this->Blocks.push_back(Block);
     }
     this->IndexToBBPseudoProbes = IndexToBBPseudoProbes;
     this->BBPseudoProbeToBlock = BBPseudoProbeToBlock;
@@ -263,7 +262,6 @@ class StaleMatcher {
   std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
       BBPseudoProbeToBlock;
   std::unordered_set<uint64_t> MatchedWithPseudoProbes;
-  std::vector<const FlowBlock *> Blocks;
   uint64_t YamlBFGUID{0};
   uint64_t MatchedWithOpcodes{0};
 
@@ -341,11 +339,6 @@ class StaleMatcher {
       return nullptr;
     }
     uint64_t Index = BlockPseudoProbes[0]->Index;
-    if (Index > Blocks.size()) {
-      if (opts::Verbosity >= 3)
-        errs() << "BOLT-WARNING: invalid index block pseudo probe index\n";
-      return nullptr;
-    }
     auto It = IndexToBBPseudoProbes.find(Index);
     if (It == IndexToBBPseudoProbes.end()) {
       if (opts::Verbosity >= 3)



More information about the llvm-branch-commits mailing list