[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 23 16:11:10 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Shaw Young (shawbyoung)

<details>
<summary>Changes</summary>

Implemented pseudo probe block matching. When matched functions have
equal pseudo probe checksums, the indices of block pseudo probes are
used to match blocks following opcode and call hash block matching.

Test Plan: Added match-blocks-with-pseudo-probes.test.


---
Full diff: https://github.com/llvm/llvm-project/pull/99891.diff


4 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryContext.h (+8-4) 
- (modified) bolt/lib/Passes/BinaryPasses.cpp (+14-4) 
- (modified) bolt/lib/Profile/StaleProfileMatching.cpp (+170-15) 
- (added) bolt/test/X86/match-blocks-with-pseudo-probes.test (+62) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index b3cf9f834cc08..39f2ac512d305 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -717,12 +717,16 @@ class BinaryContext {
     /// Stats for stale profile matching:
     ///   the total number of basic blocks in the profile
     uint32_t NumStaleBlocks{0};
-    ///   the number of matched basic blocks
-    uint32_t NumMatchedBlocks{0};
+    ///   the number of exactly matched basic blocks
+    uint32_t NumExactMatchedBlocks{0};
+    ///   the number of pseudo probe matched basic blocks
+    uint32_t NumPseudoProbeMatchedBlocks{0};
     ///   the total count of samples in the profile
     uint64_t StaleSampleCount{0};
-    ///   the count of matched samples
-    uint64_t MatchedSampleCount{0};
+    ///   the count of exactly matched samples
+    uint64_t ExactMatchedSampleCount{0};
+    ///   the count of pseudo probe matched samples
+    uint64_t PseudoProbeMatchedSampleCount{0};
     ///   the number of stale functions that have matching number of blocks in
     ///   the profile
     uint64_t NumStaleFuncsWithEqualBlockCount{0};
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index fa95ad7324ac1..b786f07a6a665 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1519,10 +1519,20 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
         "BOLT-INFO: inference found an exact match for %.2f%% of basic blocks"
         " (%zu out of %zu stale) responsible for %.2f%% samples"
         " (%zu out of %zu stale)\n",
-        100.0 * BC.Stats.NumMatchedBlocks / BC.Stats.NumStaleBlocks,
-        BC.Stats.NumMatchedBlocks, BC.Stats.NumStaleBlocks,
-        100.0 * BC.Stats.MatchedSampleCount / BC.Stats.StaleSampleCount,
-        BC.Stats.MatchedSampleCount, BC.Stats.StaleSampleCount);
+        100.0 * BC.Stats.NumExactMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumExactMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount,
+        BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount);
+    BC.outs() << format(
+        "BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic "
+        "blocks"
+        " (%zu out of %zu stale) responsible for %.2f%% samples"
+        " (%zu out of %zu stale)\n",
+        100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks,
+        BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks,
+        100.0 * BC.Stats.PseudoProbeMatchedSampleCount /
+            BC.Stats.StaleSampleCount,
+        BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount);
   }
 
   if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) {
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cd6e96f7e2cf4..c621c29a0db83 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -45,6 +45,7 @@ namespace opts {
 
 extern cl::opt<bool> TimeRewrite;
 extern cl::OptionCategory BoltOptCategory;
+extern cl::opt<unsigned> Verbosity;
 
 cl::opt<bool>
     InferStaleProfile("infer-stale-profile",
@@ -194,7 +195,13 @@ class StaleMatcher {
   /// Initialize stale matcher.
   void init(const std::vector<FlowBlock *> &Blocks,
             const std::vector<BlendedBlockHash> &Hashes,
-            const std::vector<uint64_t> &CallHashes) {
+            const std::vector<uint64_t> &CallHashes,
+            const std::unordered_map<uint64_t,
+                                     std::vector<const MCDecodedPseudoProbe *>>
+                &IndexToBinaryPseudoProbes,
+            const std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+                &BinaryPseudoProbeToBlock,
+            const uint64_t YamlBFGUID) {
     assert(Blocks.size() == Hashes.size() &&
            Hashes.size() == CallHashes.size() &&
            "incorrect matcher initialization");
@@ -205,14 +212,24 @@ class StaleMatcher {
       if (CallHashes[I])
         CallHashToBlocks[CallHashes[I]].push_back(
             std::make_pair(Hashes[I], Block));
+      this->Blocks.push_back(Block);
     }
+    this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
+    this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
+    this->YamlBFGUID = YamlBFGUID;
   }
 
   /// Find the most similar block for a given hash.
-  const FlowBlock *matchBlock(BlendedBlockHash BlendedHash,
-                              uint64_t CallHash) const {
+  const FlowBlock *
+  matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash,
+             const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
     const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
-    return BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
+    if (BestBlock)
+      ++MatchedWithOpcodes;
+    BestBlock = BestBlock ? BestBlock : matchWithCalls(BlendedHash, CallHash);
+    return BestBlock || !YamlBFGUID
+               ? BestBlock
+               : matchWithPseudoProbes(BlendedHash, PseudoProbes);
   }
 
   /// Returns true if the two basic blocks (in the binary and in the profile)
@@ -223,10 +240,32 @@ class StaleMatcher {
     return Hash1.InstrHash == Hash2.InstrHash;
   }
 
+  /// Returns true if a profiled block was matched with its pseudo probe.
+  bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
+    return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
+           MatchedWithPseudoProbes.end();
+  }
+
+  /// Returns the number of blocks matched with pseudo probes.
+  size_t getNumBlocksMatchedWithPseudoProbes() const {
+    return MatchedWithPseudoProbes.size();
+  }
+
+  /// Returns the number of blocks matched with opcodes.
+  size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; }
+
 private:
   using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
   std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
   std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
+  std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
+      IndexToBinaryPseudoProbes;
+  std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+      BinaryPseudoProbeToBlock;
+  std::unordered_set<uint64_t> MatchedWithPseudoProbes;
+  std::vector<const FlowBlock *> Blocks;
+  uint64_t YamlBFGUID{0};
+  uint64_t MatchedWithOpcodes{0};
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -266,6 +305,68 @@ class StaleMatcher {
     }
     return BestBlock;
   }
+  // Uses pseudo probe information to attach the profile to the appropriate
+  // block.
+  const FlowBlock *matchWithPseudoProbes(
+      BlendedBlockHash BlendedHash,
+      const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
+    // Searches for the pseudo probe attached to the matched function's block,
+    // ignoring pseudo probes attached to function calls and inlined functions'
+    // blocks.
+    if (opts::Verbosity >= 3)
+      outs() << "BOLT-INFO: attempting to match block with pseudo probes\n";
+
+    std::vector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
+    for (const auto &PseudoProbe : PseudoProbes) {
+      // Ensures that pseudo probe information belongs to the appropriate
+      // function and not an inlined function.
+      if (PseudoProbe.GUID != YamlBFGUID)
+        continue;
+      // Skips pseudo probes attached to function calls.
+      if (PseudoProbe.Type != static_cast<uint8_t>(PseudoProbeType::Block))
+        continue;
+
+      BlockPseudoProbes.push_back(&PseudoProbe);
+    }
+    // Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
+    // probe and binary pseudo probe.
+    if (BlockPseudoProbes.size() == 0) {
+      if (opts::Verbosity >= 3)
+        errs() << "BOLT-WARNING: no pseudo probes in profile block\n";
+      return nullptr;
+    }
+    if (BlockPseudoProbes.size() > 1) {
+      if (opts::Verbosity >= 3)
+        errs() << "BOLT-WARNING: more than 1 pseudo probes in profile block\n";
+      return nullptr;
+    }
+    uint64_t Index = BlockPseudoProbes[0]->Index;
+    if (Index > Blocks.size()) {
+      if (opts::Verbosity >= 3)
+        errs() << "BOLT-WARNING: invalid index block pseudo probe index\n";
+      return nullptr;
+    }
+    auto It = IndexToBinaryPseudoProbes.find(Index);
+    if (It == IndexToBinaryPseudoProbes.end()) {
+      if (opts::Verbosity >= 3)
+        errs() << "BOLT-WARNING: no block pseudo probes found within binary "
+                  "block at index\n";
+      return nullptr;
+    }
+    if (It->second.size() > 1) {
+      if (opts::Verbosity >= 3)
+        errs() << "BOLT-WARNING: more than 1 block pseudo probes in binary "
+                  "block at index\n";
+      return nullptr;
+    }
+    const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
+    auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
+    assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+           "All binary pseudo probes should belong a binary basic block");
+
+    MatchedWithPseudoProbes.insert(BlendedHash.combine());
+    return BinaryPseudoProbeIt->second;
+  }
 };
 
 void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
@@ -447,18 +548,22 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-size_t
-matchWeightsByHashes(BinaryContext &BC,
-                     const BinaryFunction::BasicBlockOrderType &BlockOrder,
-                     const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                     FlowFunction &Func, HashFunction HashFunction,
-                     YAMLProfileReader::ProfileLookupMap &IdToYamlBF) {
+size_t matchWeightsByHashes(
+    BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
+    const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
+    HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
+    const BinaryFunction &BF) {
 
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<uint64_t> CallHashes;
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
+  std::unordered_map<uint64_t, std::vector<const MCDecodedPseudoProbe *>>
+      IndexToBinaryPseudoProbes;
+  std::unordered_map<const MCDecodedPseudoProbe *, FlowBlock *>
+      BinaryPseudoProbeToBlock;
+  const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     const BinaryBasicBlock *BB = BlockOrder[I];
     assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -478,11 +583,49 @@ matchWeightsByHashes(BinaryContext &BC,
     Blocks.push_back(&Func.Blocks[I + 1]);
     BlendedBlockHash BlendedHash(BB->getHash());
     BlendedHashes.push_back(BlendedHash);
+    if (PseudoProbeDecoder) {
+      const AddressProbesMap &ProbeMap =
+          PseudoProbeDecoder->getAddress2ProbesMap();
+      const uint64_t FuncAddr = BF.getAddress();
+      const std::pair<uint64_t, uint64_t> &BlockRange =
+          BB->getInputAddressRange();
+      const auto &BlockProbes =
+          llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
+                           ProbeMap.lower_bound(FuncAddr + BlockRange.second));
+      for (const auto &[_, Probes] : BlockProbes) {
+        for (const MCDecodedPseudoProbe &Probe : Probes) {
+          if (Probe.getInlineTreeNode()->hasInlineSite())
+            continue;
+          if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block))
+            continue;
+          IndexToBinaryPseudoProbes[Probe.getIndex()].push_back(&Probe);
+          BinaryPseudoProbeToBlock[&Probe] = Blocks[I];
+        }
+      }
+    }
+
     LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
                       << Twine::utohexstr(BB->getHash()) << "\n");
   }
+
+  uint64_t BFPseudoProbeDescHash = 0;
+  if (BF.getGUID() != 0) {
+    assert(PseudoProbeDecoder &&
+           "If BF has pseudo probe, BC should have a pseudo probe decoder");
+    auto &GUID2FuncDescMap = PseudoProbeDecoder->getGUID2FuncDescMap();
+    auto It = GUID2FuncDescMap.find(BF.getGUID());
+    if (It != GUID2FuncDescMap.end())
+      BFPseudoProbeDescHash = It->second.FuncHash;
+  }
+  uint64_t YamlBFGUID =
+      BFPseudoProbeDescHash && YamlBF.PseudoProbeDescHash &&
+              BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash
+          ? static_cast<uint64_t>(YamlBF.GUID)
+          : 0;
+
   StaleMatcher Matcher;
-  Matcher.init(Blocks, BlendedHashes, CallHashes);
+  Matcher.init(Blocks, BlendedHashes, CallHashes, IndexToBinaryPseudoProbes,
+               BinaryPseudoProbeToBlock, YamlBFGUID);
 
   // Index in yaml profile => corresponding (matched) block
   DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
@@ -502,7 +645,7 @@ matchWeightsByHashes(BinaryContext &BC,
       else
         llvm_unreachable("Unhandled HashFunction");
     }
-    MatchedBlock = Matcher.matchBlock(YamlHash, CallHash);
+    MatchedBlock = Matcher.matchBlock(YamlHash, CallHash, YamlBB.PseudoProbes);
     if (MatchedBlock == nullptr && YamlBB.Index == 0)
       MatchedBlock = Blocks[0];
     if (MatchedBlock != nullptr) {
@@ -516,9 +659,13 @@ matchWeightsByHashes(BinaryContext &BC,
                         << "\n");
       // Update matching stats accounting for the matched block.
       if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
-        ++BC.Stats.NumMatchedBlocks;
-        BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
+        ++BC.Stats.NumExactMatchedBlocks;
+        BC.Stats.ExactMatchedSampleCount += YamlBB.ExecCount;
         LLVM_DEBUG(dbgs() << "  exact match\n");
+      } else if (Matcher.isPseudoProbeMatch(YamlHash)) {
+        ++BC.Stats.NumPseudoProbeMatchedBlocks;
+        BC.Stats.PseudoProbeMatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  pseudo probe match\n");
       } else {
         LLVM_DEBUG(dbgs() << "  loose match\n");
       }
@@ -535,6 +682,14 @@ matchWeightsByHashes(BinaryContext &BC,
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: "
+           << Matcher.getNumBlocksMatchedWithPseudoProbes()
+           << " blocks matched with pseudo probes\n"
+           << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes()
+           << " blocks matched with opcodes\n";
+  }
+
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
@@ -828,7 +983,7 @@ bool YAMLProfileReader::inferStaleProfile(
   // Match as many block/jump counts from the stale profile as possible
   size_t MatchedBlocks =
       matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
-                           YamlBP.Header.HashFunction, IdToYamLBF);
+                           YamlBP.Header.HashFunction, IdToYamLBF, BF);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
new file mode 100644
index 0000000000000..83f9c20f31ba6
--- /dev/null
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -0,0 +1,62 @@
+## Tests stale block matching with pseudo probes.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile 2>&1 | FileCheck %s
+
+# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale)
+
+#--- main.s
+ .text
+  .globl  main                            # -- Begin function main
+  .p2align        4, 0x90
+  .type   main, at function
+main:                                   # @main
+# %bb.0:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  movl    $0, -4(%rbp)
+  .pseudoprobe    15822663052811949562 1 0 0 main
+  xorl    %eax, %eax
+  popq    %rbp
+  retq
+.Lfunc_end0:
+  .size   main, .Lfunc_end0-main
+                                  # -- End function
+  .section        .pseudo_probe_desc,"", at progbits
+  .quad   -2624081020897602054
+  .quad   4294967295
+  .byte   4
+  .ascii  "main"
+  .ident  "clang version 17.0.6 (CentOS 17.0.6-5.el9)"
+  .section        ".note.GNU-stack","", at progbits
+  .addrsig
+
+#--- yaml
+---
+header:
+  profile-version: 1
+  binary-name:     'match-blocks-with-pseudo-probes.s.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:                   main
+    fid:                    0
+    hash:                   0x0000000000000001
+    exec:                   1
+    nblocks:                6
+    guid:                   0xDB956436E78DD5FA
+    pseudo_probe_desc_hash: 4294967295    #lookup in code in a second
+    blocks:
+      - bid:             1
+        hash:            0x0000000000000001
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+        pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 } ]

``````````

</details>


https://github.com/llvm/llvm-project/pull/99891


More information about the llvm-branch-commits mailing list