[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
Amir Ayupov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 8 12:57:18 PST 2024
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99891
>From 36197b175681d07b4704e576fb008cec3cc1e05e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 28 Aug 2024 21:10:25 +0200
Subject: [PATCH 1/3] Reworked block probe matching
Use new probe ifaces
Get all function probes at once
Drop ProfileUsePseudoProbes
Unify matchWithBlockPseudoProbes
Distinguish exact and loose probe match
---
bolt/include/bolt/Core/BinaryContext.h | 20 +-
bolt/lib/Passes/BinaryPasses.cpp | 40 ++-
bolt/lib/Profile/StaleProfileMatching.cpp | 404 ++++++++++------------
bolt/lib/Rewrite/PseudoProbeRewriter.cpp | 8 +-
4 files changed, 237 insertions(+), 235 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 3e20cb607e657b..3f7b2ac0bc6cf9 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -724,14 +724,26 @@ class BinaryContext {
uint32_t NumStaleBlocks{0};
/// the number of exactly matched basic blocks
uint32_t NumExactMatchedBlocks{0};
- /// the number of pseudo probe matched basic blocks
- uint32_t NumPseudoProbeMatchedBlocks{0};
+ /// the number of loosely matched basic blocks
+ uint32_t NumLooseMatchedBlocks{0};
+ /// the number of exactly pseudo probe matched basic blocks
+ uint32_t NumPseudoProbeExactMatchedBlocks{0};
+ /// the number of loosely pseudo probe matched basic blocks
+ uint32_t NumPseudoProbeLooseMatchedBlocks{0};
+ /// the number of call matched basic blocks
+ uint32_t NumCallMatchedBlocks{0};
/// the total count of samples in the profile
uint64_t StaleSampleCount{0};
/// the count of exactly matched samples
uint64_t ExactMatchedSampleCount{0};
- /// the count of pseudo probe matched samples
- uint64_t PseudoProbeMatchedSampleCount{0};
+ /// the count of exactly matched samples
+ uint64_t LooseMatchedSampleCount{0};
+ /// the count of exactly pseudo probe matched samples
+ uint64_t PseudoProbeExactMatchedSampleCount{0};
+ /// the count of loosely pseudo probe matched samples
+ uint64_t PseudoProbeLooseMatchedSampleCount{0};
+ /// the count of call matched samples
+ uint64_t CallMatchedSampleCount{0};
/// the number of stale functions that have matching number of blocks in
/// the profile
uint64_t NumStaleFuncsWithEqualBlockCount{0};
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index b786f07a6a6651..8edbd58c3ed3de 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -1524,15 +1524,43 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
100.0 * BC.Stats.ExactMatchedSampleCount / BC.Stats.StaleSampleCount,
BC.Stats.ExactMatchedSampleCount, BC.Stats.StaleSampleCount);
BC.outs() << format(
- "BOLT-INFO: inference found a pseudo probe match for %.2f%% of basic "
+ "BOLT-INFO: inference found an exact pseudo probe match for %.2f%% of "
+ "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
+ " (%zu out of %zu stale)\n",
+ 100.0 * BC.Stats.NumPseudoProbeExactMatchedBlocks /
+ BC.Stats.NumStaleBlocks,
+ BC.Stats.NumPseudoProbeExactMatchedBlocks, BC.Stats.NumStaleBlocks,
+ 100.0 * BC.Stats.PseudoProbeExactMatchedSampleCount /
+ BC.Stats.StaleSampleCount,
+ BC.Stats.PseudoProbeExactMatchedSampleCount, BC.Stats.StaleSampleCount);
+ BC.outs() << format(
+ "BOLT-INFO: inference found a loose pseudo probe match for %.2f%% of "
+ "basic blocks (%zu out of %zu stale) responsible for %.2f%% samples"
+ " (%zu out of %zu stale)\n",
+ 100.0 * BC.Stats.NumPseudoProbeLooseMatchedBlocks /
+ BC.Stats.NumStaleBlocks,
+ BC.Stats.NumPseudoProbeLooseMatchedBlocks, BC.Stats.NumStaleBlocks,
+ 100.0 * BC.Stats.PseudoProbeLooseMatchedSampleCount /
+ BC.Stats.StaleSampleCount,
+ BC.Stats.PseudoProbeLooseMatchedSampleCount, BC.Stats.StaleSampleCount);
+ BC.outs() << format(
+ "BOLT-INFO: inference found a call match for %.2f%% of basic "
"blocks"
" (%zu out of %zu stale) responsible for %.2f%% samples"
" (%zu out of %zu stale)\n",
- 100.0 * BC.Stats.NumPseudoProbeMatchedBlocks / BC.Stats.NumStaleBlocks,
- BC.Stats.NumPseudoProbeMatchedBlocks, BC.Stats.NumStaleBlocks,
- 100.0 * BC.Stats.PseudoProbeMatchedSampleCount /
- BC.Stats.StaleSampleCount,
- BC.Stats.PseudoProbeMatchedSampleCount, BC.Stats.StaleSampleCount);
+ 100.0 * BC.Stats.NumCallMatchedBlocks / BC.Stats.NumStaleBlocks,
+ BC.Stats.NumCallMatchedBlocks, BC.Stats.NumStaleBlocks,
+ 100.0 * BC.Stats.CallMatchedSampleCount / BC.Stats.StaleSampleCount,
+ BC.Stats.CallMatchedSampleCount, BC.Stats.StaleSampleCount);
+ BC.outs() << format(
+ "BOLT-INFO: inference found a loose match for %.2f%% of basic "
+ "blocks"
+ " (%zu out of %zu stale) responsible for %.2f%% samples"
+ " (%zu out of %zu stale)\n",
+ 100.0 * BC.Stats.NumLooseMatchedBlocks / BC.Stats.NumStaleBlocks,
+ BC.Stats.NumLooseMatchedBlocks, BC.Stats.NumStaleBlocks,
+ 100.0 * BC.Stats.LooseMatchedSampleCount / BC.Stats.StaleSampleCount,
+ BC.Stats.LooseMatchedSampleCount, BC.Stats.StaleSampleCount);
}
if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) {
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index ef9320ae168fe7..2ec74ac7549f7c 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -29,6 +29,7 @@
#include "bolt/Profile/YAMLProfileReader.h"
#include "llvm/ADT/Bitfields.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/xxhash.h"
@@ -46,7 +47,6 @@ namespace opts {
extern cl::opt<bool> TimeRewrite;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<unsigned> Verbosity;
-extern cl::opt<bool> ProfileUsePseudoProbes;
cl::opt<bool>
InferStaleProfile("infer-stale-profile",
@@ -198,8 +198,6 @@ struct BlendedBlockHash {
/// release.
class StaleMatcher {
public:
- StaleMatcher(const uint64_t YamlBFGUID) : YamlBFGUID(YamlBFGUID) {}
-
/// Initialize stale matcher.
void init(const std::vector<FlowBlock *> &Blocks,
const std::vector<BlendedBlockHash> &Hashes,
@@ -217,39 +215,38 @@ class StaleMatcher {
}
}
- /// Creates a mapping from a inlined pseudo probe's guid and index to probe.
- void mapGUIDAndIndexToProbe(uint64_t Guid, uint64_t Index,
- const MCDecodedPseudoProbe *Probe) {
- IndexAndGUIDToInlinedProbes[Guid][Index].push_back(Probe);
- }
-
- /// Creates a mapping from a pseudo probe index to pseudo probe.
- void mapIndexToProbe(uint64_t Index, const MCDecodedPseudoProbe *Probe) {
- IndexToProbes[Index].push_back(Probe);
- }
-
/// Creates a mapping from a pseudo probe to a flow block.
void mapProbeToBB(const MCDecodedPseudoProbe *Probe, FlowBlock *Block) {
BBPseudoProbeToBlock[Probe] = Block;
}
+ enum MatchMethod : char {
+ MATCH_EXACT = 0,
+ MATCH_PROBE_EXACT,
+ MATCH_PROBE_LOOSE,
+ MATCH_OPCODE,
+ MATCH_CALL,
+ NO_MATCH
+ };
+
/// Find the most similar flow block for a profile block given its hashes and
/// pseudo probe information.
- const FlowBlock *
+ std::pair<const FlowBlock *, MatchMethod>
matchBlock(BlendedBlockHash BlendedHash, uint64_t CallHash,
- const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) {
- const FlowBlock *BestBlock = matchWithOpcodes(BlendedHash);
- if (BestBlock) {
- ++MatchedWithOpcodes;
- return BestBlock;
- }
- BestBlock = matchWithCalls(BlendedHash, CallHash);
- if (BestBlock)
- return BestBlock;
- BestBlock = matchWithPseudoProbes(BlendedHash, PseudoProbes);
- if (BestBlock)
- MatchedWithPseudoProbes.insert(BlendedHash.combine());
- return BestBlock;
+ const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
+ const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) {
+ const auto &[Block, Hash] = matchWithOpcodes(BlendedHash);
+ if (isHighConfidenceMatch(Hash, BlendedHash))
+ return {Block, MATCH_EXACT};
+ const auto &[ProbeBlock, Exact] =
+ matchWithPseudoProbes(PseudoProbes, InlineTree);
+ if (ProbeBlock)
+ return {ProbeBlock, Exact ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
+ if (const FlowBlock *BestBlock = matchWithCalls(BlendedHash, CallHash))
+ return {BestBlock, MATCH_CALL};
+ if (Block)
+ return {Block, MATCH_OPCODE};
+ return {nullptr, NO_MATCH};
}
/// Returns true if the two basic blocks (in the binary and in the profile)
@@ -260,48 +257,49 @@ class StaleMatcher {
return Hash1.InstrHash == Hash2.InstrHash;
}
- /// Returns true if a profiled block was matched with its pseudo probe.
- bool isPseudoProbeMatch(BlendedBlockHash YamlBBHash) {
- return MatchedWithPseudoProbes.find(YamlBBHash.combine()) !=
- MatchedWithPseudoProbes.end();
+ /// Returns matched InlineTree * for a given profile inline_tree_id.
+ const MCDecodedPseudoProbeInlineTree *
+ getInlineTreeNode(uint32_t ProfileInlineTreeNodeId) const {
+ auto It = InlineTreeNodeMap.find(ProfileInlineTreeNodeId);
+ if (It == InlineTreeNodeMap.end())
+ return nullptr;
+ return It->second;
}
- /// Returns the number of blocks matched with opcodes.
- size_t getNumBlocksMatchedWithOpcodes() const { return MatchedWithOpcodes; }
-
- /// Returns the number of blocks matched with pseudo probes.
- size_t getNumBlocksMatchedWithPseudoProbes() const {
- return MatchedWithPseudoProbes.size();
+ void mapInlineTreeNode(uint32_t ProfileNode,
+ const MCDecodedPseudoProbeInlineTree *BinaryNode) {
+ auto Res = InlineTreeNodeMap.try_emplace(ProfileNode, BinaryNode);
+ assert(Res.second &&
+ "Duplicate mapping from profile node index to binary inline tree");
+ (void)Res;
}
private:
using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
- DenseMap<uint64_t, std::vector<const MCDecodedPseudoProbe *>> IndexToProbes;
- DenseMap<uint64_t,
- DenseMap<uint64_t, std::vector<const MCDecodedPseudoProbe *>>>
- IndexAndGUIDToInlinedProbes;
+ DenseMap<uint32_t, const MCDecodedPseudoProbeInlineTree *> InlineTreeNodeMap;
DenseMap<const MCDecodedPseudoProbe *, FlowBlock *> BBPseudoProbeToBlock;
- DenseSet<uint64_t> MatchedWithPseudoProbes;
- const uint64_t YamlBFGUID{0};
- uint64_t MatchedWithOpcodes{0};
- // Uses OpcodeHash to find the most similar block for a given hash.
- const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
+ // Uses OpcodeHash to find the most similar block (with blended hash) for a
+ // given hash.
+ std::pair<const FlowBlock *, BlendedBlockHash>
+ matchWithOpcodes(BlendedBlockHash BlendedHash) const {
auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash);
if (BlockIt == OpHashToBlocks.end())
- return nullptr;
+ return {nullptr, BlendedBlockHash(0)};
FlowBlock *BestBlock = nullptr;
uint64_t BestDist = std::numeric_limits<uint64_t>::max();
+ BlendedBlockHash BestHash;
for (const auto &[Hash, Block] : BlockIt->second) {
uint64_t Dist = Hash.distance(BlendedHash);
if (BestBlock == nullptr || Dist < BestDist) {
BestDist = Dist;
BestBlock = Block;
+ BestHash = Hash;
}
}
- return BestBlock;
+ return {BestBlock, BestHash};
}
// Uses CallHash to find the most similar block for a given hash.
@@ -326,120 +324,71 @@ class StaleMatcher {
return BestBlock;
}
- /// A helper function for logging.
- static bool LogErrIfExpr(bool Expr, StringRef Message) {
- if (Expr)
- errs() << Message;
- return Expr;
- }
-
- /// Matches an inlined profile block with an inlined binary block based on
- /// pseudo probes.
- const FlowBlock *matchWithInlinedBlockPseudoProbes(
- SmallVector<const yaml::bolt::PseudoProbeInfo *>
- &InlinedBlockPseudoProbes) const {
- if (opts::Verbosity >= 3)
- outs() << "BOLT-INFO: attempting to match block with inlined block "
- "pseudo probes\n";
-
- size_t NInlinedBlockPseudoProbes = InlinedBlockPseudoProbes.size();
- if (LogErrIfExpr(NInlinedBlockPseudoProbes == 0,
- "BOLT-WARNING: no pseudo probes in profile block\n"))
- return nullptr;
- if (LogErrIfExpr(
- NInlinedBlockPseudoProbes > 1,
- "BOLT-WARNING: more than 1 pseudo probes in profile block\n"))
- return nullptr;
-
- const auto *InlinedPseudoProbe = InlinedBlockPseudoProbes[0];
- uint64_t Guid = InlinedPseudoProbe->GUID;
- uint64_t Index = InlinedPseudoProbe->Index;
-
- auto GuidIt = IndexAndGUIDToInlinedProbes.find(Guid);
- if (LogErrIfExpr(
- GuidIt == IndexAndGUIDToInlinedProbes.end(),
- "BOLT-WARNING: no pseudo probes found within BB at index\n"))
- return nullptr;
- auto IndexIt = GuidIt->second.find(Index);
- if (LogErrIfExpr(
- IndexIt == GuidIt->second.end(),
- "BOLT-WARNING: no pseudo probes found within BB at index\n"))
- return nullptr;
-
- if (LogErrIfExpr(
- IndexIt->second.size() > 1,
- "BOLT-WARNING: more than 1 block pseudo probes in BB at index\n"))
- return nullptr;
-
- const MCDecodedPseudoProbe *BinaryPseudoProbe = IndexIt->second[0];
- auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe);
- assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() &&
- "All binary pseudo probes should belong a binary basic block");
-
- return BinaryPseudoProbeIt->second;
- }
-
/// Matches a profile block with an binary block based on pseudo probes.
- const FlowBlock *matchWithNonInlinedBlockPseudoProbes(
- SmallVector<const yaml::bolt::PseudoProbeInfo *> &BlockPseudoProbes)
- const {
- if (opts::Verbosity >= 3)
- outs() << "BOLT-INFO: attempting to match block with inlined block "
- "pseudo probes\n";
-
- size_t NBlockPseudoProbes = BlockPseudoProbes.size();
- if (LogErrIfExpr(NBlockPseudoProbes == 0,
- "BOLT-WARNING: no pseudo probes in profile block\n"))
- return nullptr;
- if (LogErrIfExpr(
- NBlockPseudoProbes > 1,
- "BOLT-WARNING: more than 1 pseudo probes in profile block\n"))
- return nullptr;
- uint64_t Index = BlockPseudoProbes[0]->Index;
- auto It = IndexToProbes.find(Index);
- if (LogErrIfExpr(
- It == IndexToProbes.end(),
- "BOLT-WARNING: no block pseudo probes found within BB at index\n"))
- return nullptr;
- if (LogErrIfExpr(
- It->second.size() > 1,
- "BOLT-WARNING: more than 1 block pseudo probes in BB at index\n"))
- return nullptr;
- const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
- auto BinaryPseudoProbeIt = BBPseudoProbeToBlock.find(BinaryPseudoProbe);
- assert(BinaryPseudoProbeIt != BBPseudoProbeToBlock.end() &&
- "All binary pseudo probes should belong a binary basic block");
+ /// Returns the best matching block (or nullptr) and whether the match is
+ /// unambiguous.
+ std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
+ const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
+ const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) const {
+ if (!opts::StaleMatchingWithBlockPseudoProbes)
+ return {nullptr, false};
+
+ auto logIf = [](bool Expr, StringRef Message) {
+ LLVM_DEBUG(if (Expr) errs() << Message << '\n');
+ return Expr;
+ };
- return BinaryPseudoProbeIt->second;
- }
+ DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
- /// Uses pseudo probe information to attach the profile to the appropriate
- /// block.
- const FlowBlock *matchWithPseudoProbes(
- BlendedBlockHash BlendedHash,
- const std::vector<yaml::bolt::PseudoProbeInfo> &PseudoProbes) const {
- if (!opts::StaleMatchingWithBlockPseudoProbes || !YamlBFGUID)
- return nullptr;
-
- // Searches for the pseudo probe attached to the matched function's block.
- SmallVector<const yaml::bolt::PseudoProbeInfo *> BlockPseudoProbes;
- SmallVector<const yaml::bolt::PseudoProbeInfo *> InlinedBlockPseudoProbes;
- for (const auto &PseudoProbe : PseudoProbes) {
- // Skips pseudo probes attached to function calls.
- if (PseudoProbe.Type != static_cast<uint8_t>(PseudoProbeType::Block))
+ for (const yaml::bolt::PseudoProbeInfo &Probe : BlockPseudoProbes) {
+ const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+ getInlineTreeNode(Probe.InlineTreeIndex);
+ if (logIf(!InlineTreeNode,
+ formatv("no matching inline tree node for {0} {1}",
+ Probe.InlineTreeIndex, Probe.Index).str())) {
+ ++FlowBlockMatchCount[nullptr];
continue;
- if (PseudoProbe.GUID != YamlBFGUID)
- InlinedBlockPseudoProbes.push_back(&PseudoProbe);
- else
- BlockPseudoProbes.push_back(&PseudoProbe);
+ }
+ const MCDecodedPseudoProbe *BinaryProbe = nullptr;
+ for (const MCDecodedPseudoProbe &FuncProbe :
+ InlineTreeNode->getProbes()) {
+ if (FuncProbe.getIndex() != Probe.Index)
+ continue;
+ BinaryProbe = &FuncProbe;
+ break;
+ }
+ if (logIf(!BinaryProbe, formatv("no matching binary probe for {0} {1}",
+ Probe.InlineTreeIndex, Probe.Index)
+ .str())) {
+ ++FlowBlockMatchCount[nullptr];
+ continue;
+ }
+ auto It = BBPseudoProbeToBlock.find(BinaryProbe);
+ if (logIf(It == BBPseudoProbeToBlock.end(),
+ formatv("no probe->block for {0} {1}", Probe.InlineTreeIndex,
+ Probe.Index)
+ .str())) {
+ ++FlowBlockMatchCount[nullptr];
+ continue;
+ }
+ const FlowBlock *Block = It->second;
+ ++FlowBlockMatchCount[Block];
+ }
+ uint32_t BestMatchCount = 0;
+ uint32_t TotalMatchCount = 0;
+ const FlowBlock *BestMatchBlock = nullptr;
+ for (auto &[Block, Count] : FlowBlockMatchCount) {
+ logIf(true, formatv("block {0} count {1}",
+ Block ? Block->Index : UINT64_MAX, Count)
+ .str());
+ TotalMatchCount += Count;
+ if (Count > BestMatchCount ||
+ (Count == BestMatchCount && !BestMatchBlock)) {
+ BestMatchBlock = Block;
+ BestMatchCount = Count;
+ }
}
- // Returns nullptr if there is not a 1:1 mapping of the profile block pseudo
- // probe and a binary block pseudo probe.
- const FlowBlock *MatchedInlinedBlock =
- matchWithInlinedBlockPseudoProbes(InlinedBlockPseudoProbes);
- return MatchedInlinedBlock
- ? MatchedInlinedBlock
- : matchWithNonInlinedBlockPseudoProbes(BlockPseudoProbes);
+ return {BestMatchBlock, BestMatchCount / TotalMatchCount};
}
};
@@ -630,26 +579,7 @@ size_t matchWeightsByHashes(
assert(Func.Blocks.size() == BlockOrder.size() + 2);
- // Sets the YamlBFGUID in the StaleMatcher such that if either the profiled or
- // binary function dne or they are not equal, to zero, as not to perform
- // pseudo probe block matching. Otherwise, the YamlBF's GUID is used for
- // pseudo probe block matching.
- const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes && opts::StaleMatchingWithBlockPseudoProbes
- ? BC.getPseudoProbeDecoder()
- : nullptr;
- uint64_t BFPseudoProbeDescHash = 0;
- if (opts::ProfileUsePseudoProbes &&
- opts::StaleMatchingWithBlockPseudoProbes && BF.getGUID() != 0) {
- assert(PseudoProbeDecoder &&
- "If BF has pseudo probe, BC should have a pseudo probe decoder");
- auto &GUID2FuncDescMap = PseudoProbeDecoder->getGUID2FuncDescMap();
- auto It = GUID2FuncDescMap.find(BF.getGUID());
- if (It != GUID2FuncDescMap.end())
- BFPseudoProbeDescHash = It->second.FuncHash;
- }
-
- StaleMatcher Matcher(YamlBF.GUID);
+ StaleMatcher Matcher;
std::vector<uint64_t> CallHashes;
std::vector<FlowBlock *> Blocks;
std::vector<BlendedBlockHash> BlendedHashes;
@@ -672,38 +602,55 @@ size_t matchWeightsByHashes(
Blocks.push_back(&Func.Blocks[I + 1]);
BlendedBlockHash BlendedHash(BB->getHash());
BlendedHashes.push_back(BlendedHash);
- // Collects pseudo probes attached to the BB for use in the StaleMatcher.
- if (opts::ProfileUsePseudoProbes &&
- opts::StaleMatchingWithBlockPseudoProbes && BFPseudoProbeDescHash &&
- YamlBF.PseudoProbeDescHash &&
- BFPseudoProbeDescHash == YamlBF.PseudoProbeDescHash) {
- assert(PseudoProbeDecoder &&
- "If pseudo probes are in use, psuedo probe decoder should exist");
- const AddressProbesMap &ProbeMap =
- PseudoProbeDecoder->getAddress2ProbesMap();
- const uint64_t FuncAddr = BF.getAddress();
- const std::pair<uint64_t, uint64_t> &BlockRange =
- BB->getInputAddressRange();
- const auto &BlockProbes =
- llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
- ProbeMap.lower_bound(FuncAddr + BlockRange.second));
- for (const auto &[_, Probes] : BlockProbes) {
- for (const MCDecodedPseudoProbe &Probe : Probes) {
- if (Probe.getType() != static_cast<uint8_t>(PseudoProbeType::Block))
- continue;
- if (Probe.getInlineTreeNode()->hasInlineSite())
- Matcher.mapGUIDAndIndexToProbe(Probe.getGuid(), Probe.getIndex(),
- &Probe);
- else
- Matcher.mapIndexToProbe(Probe.getIndex(), &Probe);
- Matcher.mapProbeToBB(&Probe, Blocks[I]);
- }
- }
- }
LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
<< Twine::utohexstr(BB->getHash()) << "\n");
}
+ // Collects function pseudo probes for use in the StaleMatcher.
+ if (opts::StaleMatchingWithBlockPseudoProbes) {
+ const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
+ assert(PseudoProbeDecoder &&
+ "If pseudo probes are in use, pseudo probe decoder should exist");
+ const AddressProbesMap &ProbeMap =
+ PseudoProbeDecoder->getAddress2ProbesMap();
+ const uint64_t FuncAddr = BF.getAddress();
+ for (const MCDecodedPseudoProbe &Probe :
+ ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
+ if (const BinaryBasicBlock *BB =
+ BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
+ Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
+ // Match inline tree nodes by GUID, checksum, parent, and call site.
+ unsigned MatchedNodes = 0;
+ const MCDecodedPseudoProbeInlineTree *DummyInlineRoot =
+ &PseudoProbeDecoder->getDummyInlineRoot();
+ for (const yaml::bolt::InlineTreeInfo &InlineTreeNode : YamlBF.InlineTree) {
+ uint64_t GUID = InlineTreeNode.GUID;
+ uint64_t Hash = InlineTreeNode.Hash;
+ uint32_t InlineTreeNodeId = InlineTreeNode.Index;
+ uint32_t ParentId = InlineTreeNode.ParentIndex;
+ uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
+ const MCDecodedPseudoProbeInlineTree *ParentNode =
+ InlineTreeNodeId ? Matcher.getInlineTreeNode(ParentId)
+ : DummyInlineRoot;
+ if (!ParentNode)
+ continue;
+ for (const MCDecodedPseudoProbeInlineTree &Child :
+ ParentNode->getChildren()) {
+ if (Child.Guid != GUID ||
+ PseudoProbeDecoder->getFuncDescForGUID(GUID)->FuncHash != Hash)
+ continue;
+ // Check inline site for non-toplev inline tree nodes.
+ if (ParentNode != DummyInlineRoot &&
+ std::get<1>(Child.getInlineSite()) != CallSiteProbe)
+ continue;
+ Matcher.mapInlineTreeNode(InlineTreeNodeId, &Child);
+ ++MatchedNodes;
+ break;
+ }
+ }
+ LLVM_DEBUG(errs() << "matched " << MatchedNodes << "/"
+ << YamlBF.InlineTree.size() << " inline tree nodes\n");
+ }
Matcher.init(Blocks, BlendedHashes, CallHashes);
// Index in yaml profile => corresponding (matched) block
@@ -724,7 +671,9 @@ size_t matchWeightsByHashes(
else
llvm_unreachable("Unhandled HashFunction");
}
- MatchedBlock = Matcher.matchBlock(YamlHash, CallHash, YamlBB.PseudoProbes);
+ StaleMatcher::MatchMethod Method;
+ std::tie(MatchedBlock, Method) = Matcher.matchBlock(
+ YamlHash, CallHash, YamlBB.PseudoProbes, YamlBF.InlineTree);
if (MatchedBlock == nullptr && YamlBB.Index == 0)
MatchedBlock = Blocks[0];
if (MatchedBlock != nullptr) {
@@ -737,16 +686,34 @@ size_t matchWeightsByHashes(
<< " with hash " << Twine::utohexstr(BinHash.combine())
<< "\n");
// Update matching stats accounting for the matched block.
- if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
+ switch (Method) {
+ case StaleMatcher::MATCH_EXACT:
++BC.Stats.NumExactMatchedBlocks;
BC.Stats.ExactMatchedSampleCount += YamlBB.ExecCount;
- LLVM_DEBUG(dbgs() << " exact match\n");
- } else if (Matcher.isPseudoProbeMatch(YamlHash)) {
- ++BC.Stats.NumPseudoProbeMatchedBlocks;
- BC.Stats.PseudoProbeMatchedSampleCount += YamlBB.ExecCount;
- LLVM_DEBUG(dbgs() << " pseudo probe match\n");
- } else {
- LLVM_DEBUG(dbgs() << " loose match\n");
+ LLVM_DEBUG(dbgs() << " exact hash match\n");
+ break;
+ case StaleMatcher::MATCH_PROBE_EXACT:
+ ++BC.Stats.NumPseudoProbeExactMatchedBlocks;
+ BC.Stats.PseudoProbeExactMatchedSampleCount += YamlBB.ExecCount;
+ LLVM_DEBUG(dbgs() << " exact pseudo probe match\n");
+ break;
+ case StaleMatcher::MATCH_PROBE_LOOSE:
+ ++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
+ BC.Stats.PseudoProbeLooseMatchedSampleCount += YamlBB.ExecCount;
+ LLVM_DEBUG(dbgs() << " loose pseudo probe match\n");
+ break;
+ case StaleMatcher::MATCH_CALL:
+ ++BC.Stats.NumCallMatchedBlocks;
+ BC.Stats.CallMatchedSampleCount += YamlBB.ExecCount;
+ LLVM_DEBUG(dbgs() << " call match\n");
+ break;
+ case StaleMatcher::MATCH_OPCODE:
+ ++BC.Stats.NumLooseMatchedBlocks;
+ BC.Stats.LooseMatchedSampleCount += YamlBB.ExecCount;
+ LLVM_DEBUG(dbgs() << " loose hash match\n");
+ break;
+ case StaleMatcher::NO_MATCH:
+ LLVM_DEBUG(dbgs() << " no match\n");
}
if (YamlBB.NumInstructions == BB->size())
++BC.Stats.NumStaleBlocksWithEqualIcount;
@@ -761,13 +728,6 @@ size_t matchWeightsByHashes(
BC.Stats.StaleSampleCount += YamlBB.ExecCount;
}
- if (opts::Verbosity >= 2) {
- outs() << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithPseudoProbes()
- << " blocks matched with pseudo probes\n"
- << "BOLT-INFO: " << Matcher.getNumBlocksMatchedWithOpcodes()
- << " blocks matched with opcodes\n";
- }
-
// Match jumps from the profile to the jumps from CFG
std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4b3f9ab4cb64ae..43ab0d9fd63e51 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -51,6 +51,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
cl::Hidden, cl::cat(BoltCategory));
extern cl::opt<bool> ProfileWritePseudoProbes;
+extern cl::opt<bool> StaleMatchingWithBlockPseudoProbes;
} // namespace opts
namespace {
@@ -92,14 +93,15 @@ class PseudoProbeRewriter final : public MetadataRewriter {
};
Error PseudoProbeRewriter::preCFGInitializer() {
- if (opts::ProfileWritePseudoProbes)
- parsePseudoProbe(true);
+ if (opts::ProfileWritePseudoProbes ||
+ opts::StaleMatchingWithBlockPseudoProbes)
+ parsePseudoProbe(opts::ProfileWritePseudoProbes);
return Error::success();
}
Error PseudoProbeRewriter::postEmitFinalizer() {
- if (!opts::ProfileWritePseudoProbes)
+ if (!opts::StaleMatchingWithBlockPseudoProbes)
parsePseudoProbe();
updatePseudoProbes();
>From 8fafc048b46e8076db353ea6a7eea7b2ebae48d7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Wed, 4 Sep 2024 16:05:10 -0700
Subject: [PATCH 2/3] drop logIf
Created using spr 1.3.4
---
bolt/lib/Profile/StaleProfileMatching.cpp | 21 +++------------------
1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4f637c3e4f2eb2..110769aa31e8e1 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -331,19 +331,12 @@ class StaleMatcher {
if (!opts::StaleMatchingWithBlockPseudoProbes)
return {nullptr, false};
- auto logIf = [](bool Expr, StringRef Message) {
- LLVM_DEBUG(if (Expr) errs() << Message << '\n');
- return Expr;
- };
-
DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
for (const yaml::bolt::PseudoProbeInfo &Probe : BlockPseudoProbes) {
const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
getInlineTreeNode(Probe.InlineTreeIndex);
- if (logIf(!InlineTreeNode,
- formatv("no matching inline tree node for {0} {1}",
- Probe.InlineTreeIndex, Probe.Index).str())) {
+ if (!InlineTreeNode) {
++FlowBlockMatchCount[nullptr];
continue;
}
@@ -355,17 +348,12 @@ class StaleMatcher {
BinaryProbe = &FuncProbe;
break;
}
- if (logIf(!BinaryProbe, formatv("no matching binary probe for {0} {1}",
- Probe.InlineTreeIndex, Probe.Index)
- .str())) {
+ if (!BinaryProbe) {
++FlowBlockMatchCount[nullptr];
continue;
}
auto It = BBPseudoProbeToBlock.find(BinaryProbe);
- if (logIf(It == BBPseudoProbeToBlock.end(),
- formatv("no probe->block for {0} {1}", Probe.InlineTreeIndex,
- Probe.Index)
- .str())) {
+ if (It == BBPseudoProbeToBlock.end()) {
++FlowBlockMatchCount[nullptr];
continue;
}
@@ -376,9 +364,6 @@ class StaleMatcher {
uint32_t TotalMatchCount = 0;
const FlowBlock *BestMatchBlock = nullptr;
for (auto &[Block, Count] : FlowBlockMatchCount) {
- logIf(true, formatv("block {0} count {1}",
- Block ? Block->Index : UINT64_MAX, Count)
- .str());
TotalMatchCount += Count;
if (Count > BestMatchCount ||
(Count == BestMatchCount && !BestMatchBlock)) {
>From ebd3acfb23808721059c73defc5a33965fbb57ff Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Thu, 26 Sep 2024 13:50:19 -0700
Subject: [PATCH 3/3] Allow null block participate in majority vote, improves
run-time performance
Created using spr 1.3.4
---
bolt/lib/Profile/StaleProfileMatching.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4c60b9df875c5d..ef70b979bad201 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -372,8 +372,6 @@ class StaleMatcher {
const FlowBlock *BestMatchBlock = nullptr;
for (auto &[FlowBlock, Count] : FlowBlockMatchCount) {
TotalMatchCount += Count;
- if (!FlowBlock)
- continue;
if (Count > BestMatchCount ||
(Count == BestMatchCount && !BestMatchBlock)) {
BestMatchBlock = FlowBlock;
More information about the llvm-branch-commits
mailing list