[llvm-branch-commits] [BOLT] Add pseudo probe inline tree to YAML profile (PR #107137)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Sep 3 11:17:47 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
To be used for pseudo probe function matching (#<!-- -->100446).
---
Full diff: https://github.com/llvm/llvm-project/pull/107137.diff
3 Files Affected:
- (modified) bolt/include/bolt/Profile/ProfileYAMLMapping.h (+38-11)
- (modified) bolt/lib/Profile/DataAggregator.cpp (+46-7)
- (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+49-10)
``````````diff
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 2a0514d7d9304b..f0cc116ebc6cb0 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -95,24 +95,28 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
namespace bolt {
struct PseudoProbeInfo {
- llvm::yaml::Hex64 GUID;
uint64_t Index;
+ uint32_t InlineTreeIndex;
+ llvm::yaml::Hex32 Offset{0};
uint8_t Type;
bool operator==(const PseudoProbeInfo &Other) const {
- return GUID == Other.GUID && Index == Other.Index;
+ return InlineTreeIndex == Other.InlineTreeIndex && Index == Other.Index;
}
- bool operator!=(const PseudoProbeInfo &Other) const {
- return !(*this == Other);
+ bool operator<(const PseudoProbeInfo &Other) const {
+ if (InlineTreeIndex == Other.InlineTreeIndex)
+ return Index < Other.Index;
+ return InlineTreeIndex < Other.InlineTreeIndex;
}
};
} // end namespace bolt
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
- YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
+ YamlIO.mapOptional("inline_tree_id", PI.InlineTreeIndex, (uint32_t)0);
+ YamlIO.mapOptional("offset", PI.Offset, (uint32_t)0);
}
static const bool flow = true;
@@ -122,7 +126,7 @@ template <> struct MappingTraits<bolt::PseudoProbeInfo> {
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
namespace llvm {
namespace yaml {
@@ -163,10 +167,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
}
};
+namespace bolt {
+struct InlineTreeInfo {
+ uint32_t Index;
+ uint32_t ParentIndex;
+ uint32_t CallSiteProbe;
+ llvm::yaml::Hex64 GUID;
+ llvm::yaml::Hex64 Hash;
+ bool operator==(const InlineTreeInfo &Other) const {
+ return Index == Other.Index;
+ }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::InlineTreeInfo> {
+ static void mapping(IO &YamlIO, bolt::InlineTreeInfo &ITI) {
+ YamlIO.mapRequired("guid", ITI.GUID);
+ YamlIO.mapRequired("hash", ITI.Hash);
+ YamlIO.mapRequired("id", ITI.Index);
+ YamlIO.mapOptional("parent", ITI.ParentIndex, (uint32_t)0);
+ YamlIO.mapOptional("callsite", ITI.CallSiteProbe, 0);
+ }
+
+ static const bool flow = true;
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo)
namespace llvm {
namespace yaml {
@@ -179,8 +208,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
- llvm::yaml::Hex64 GUID{0};
- llvm::yaml::Hex64 PseudoProbeDescHash{0};
+ std::vector<InlineTreeInfo> InlineTree;
bool Used{false};
};
} // end namespace bolt
@@ -194,9 +222,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
- YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
- YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
- (uint64_t)0);
+ YamlIO.mapOptional("inline_tree", BFP.InlineTree,
+ std::vector<bolt::InlineTreeInfo>());
}
};
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 10d745cc69824b..803cc4725b5702 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <optional>
+#include <queue>
#include <unordered_map>
#include <utility>
@@ -2402,12 +2403,43 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF->getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
+ InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF->getGUID()) {
+ std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
+ // FIXME: faster inline tree lookup by top-level GUID
+ if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
+ PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
+ [&](const auto &InlineTree) {
+ return InlineTree.Guid == BF->getGUID();
+ })) {
+ ITWorklist.push(InlineTree);
+ InlineTreeNodeId[InlineTree] = 0;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(BF->getGUID())->FuncHash;
+ YamlBF.InlineTree.emplace_back(
+ yaml::bolt::InlineTreeInfo{0, 0, 0, BF->getGUID(), Hash});
+ }
+ uint32_t ParentId = 0;
+ uint32_t NodeId = 1;
+ while (!ITWorklist.empty()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
+ for (const MCDecodedPseudoProbeInlineTree &Child :
+ Cur->getChildren()) {
+ InlineTreeNodeId[&Child] = NodeId;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
+ YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
+ NodeId++, ParentId, std::get<1>(Child.getInlineSite()),
+ Child.Guid, Hash});
+ ITWorklist.push(&Child);
+ }
+ ITWorklist.pop();
+ ++ParentId;
}
+ }
+
+ if (PseudoProbeDecoder) {
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
@@ -2420,12 +2452,19 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
- const unsigned BlockIndex = getBlock(InputOffset).second;
+ const auto [BlockOffset, BlockIndex] = getBlock(InputOffset);
+ uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
+ uint32_t Offset = InputOffset - BlockOffset;
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
- yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
+ yaml::bolt::PseudoProbeInfo{Probe.getIndex(), NodeId, Offset,
Probe.getType()});
}
}
+ for (yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
+ llvm::sort(YamlBB.PseudoProbes);
+ YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
+ YamlBB.PseudoProbes.end());
+ }
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index ffbf2388e912fb..817689230e2a70 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -12,11 +12,15 @@
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
+#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
+#include <deque>
+#include <queue>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"
@@ -77,13 +81,6 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF.getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
- }
- }
BinaryFunction::BasicBlockOrderType Order;
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
@@ -92,6 +89,40 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const FunctionLayout Layout = BF.getLayout();
Layout.updateLayoutIndices(Order);
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF.getGUID()) {
+ std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
+ // FIXME: faster inline tree lookup by top-level GUID
+ if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
+ PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
+ [&](const auto &InlineTree) {
+ return InlineTree.Guid == BF.getGUID();
+ })) {
+ ITWorklist.push(InlineTree);
+ InlineTreeNodeId[InlineTree] = 0;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash;
+ YamlBF.InlineTree.emplace_back(
+ yaml::bolt::InlineTreeInfo{0, 0, 0, BF.getGUID(), Hash});
+ }
+ uint32_t ParentId = 0;
+ uint32_t NodeId = 1;
+ while (!ITWorklist.empty()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
+ for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) {
+ InlineTreeNodeId[&Child] = NodeId;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
+ YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
+ NodeId++, ParentId, std::get<1>(Child.getInlineSite()), Child.Guid,
+ Hash});
+ ITWorklist.push(&Child);
+ }
+ ITWorklist.pop();
+ ++ParentId;
+ }
+ }
+
for (const BinaryBasicBlock *BB : Order) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
@@ -198,10 +229,18 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
- for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
+ const std::pair<uint64_t, uint64_t> BlockAddrRange = {
+ FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
+ for (const MCDecodedPseudoProbe &Probe :
+ ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second)) {
+ uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
+ uint32_t Offset = Probe.getAddress() - BlockAddrRange.first;
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
- Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+ Probe.getIndex(), NodeId, Offset, Probe.getType()});
+ }
+ llvm::sort(YamlBB.PseudoProbes);
+ YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
+ YamlBB.PseudoProbes.end());
}
YamlBF.Blocks.emplace_back(YamlBB);
``````````
</details>
https://github.com/llvm/llvm-project/pull/107137
More information about the llvm-branch-commits
mailing list