[llvm] c00c62c - [BOLT] Add pseudo probe inline tree to YAML profile
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 20:51:39 PDT 2024
Author: Amir Ayupov
Date: 2024-09-12T20:51:35-07:00
New Revision: c00c62c113d1ac121891d644a9f18f247c24d9b1
URL: https://github.com/llvm/llvm-project/commit/c00c62c113d1ac121891d644a9f18f247c24d9b1
DIFF: https://github.com/llvm/llvm-project/commit/c00c62c113d1ac121891d644a9f18f247c24d9b1.diff
LOG: [BOLT] Add pseudo probe inline tree to YAML profile
Add probe inline tree information to YAML profile, at function level:
- function GUID,
- checksum,
- parent node id,
- call site in the parent.
This information is used for pseudo probe block matching (#99891).
The encoding adds/changes probe information in multiple levels of
YAML profile:
- BinaryProfile: add pseudo_probe_desc with GUIDs and Hashes, which
permits deduplication of data:
- many GUIDs are duplicate as the same callee is commonly inlined
into multiple callers,
- hashes are also very repetitive, especially for functions with
low block counts.
- FunctionProfile: add inline tree (see above). Top-level function
is included as root of function inline tree, which makes guid and
pseudo_probe_desc_hash fields redundant.
- BlockProfile: densely-encoded block probe information:
- probes reference their containing inline tree node,
- separate lists for block, call, indirect call probes,
- block probe encoding is specialized: ids are encoded as bitset
in uint64_t. If only block probe with id=1 is present, it's
encoded as implicit entry (id=0, omitted).
- inline tree nodes with identical probes share probe description
where node indices are combined into a list.
On top of #107970, profile with new probe encoding has the following
characteristics (profile for a large binary):
- Profile without probe information: 33MB, 3.8MB compressed (baseline).
- Profile with inline tree information: 92MB, 14MB compressed.
Profile processing time (YAML parsing, inference, attaching steps):
- profile without pseudo probes: 5s,
- profile with pseudo probes, without pseudo probe matching: 11s,
- with pseudo probe matching: 12.5s.
Test Plan: updated pseudoprobe-decoding-inline.test
Reviewers: wlei-llvm, ayermolo, rafaelauler, dcci, maksfb
Reviewed By: wlei-llvm, rafaelauler
Pull Request: https://github.com/llvm/llvm-project/pull/107137
Added:
Modified:
bolt/include/bolt/Profile/ProfileYAMLMapping.h
bolt/include/bolt/Profile/YAMLProfileWriter.h
bolt/lib/Profile/DataAggregator.cpp
bolt/lib/Profile/YAMLProfileWriter.cpp
bolt/test/X86/pseudoprobe-decoding-inline.test
bolt/test/X86/pseudoprobe-decoding-noinline.test
llvm/include/llvm/MC/MCPseudoProbe.h
Removed:
################################################################################
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 2a0514d7d9304b..91955afb186e90 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
namespace bolt {
struct PseudoProbeInfo {
- llvm::yaml::Hex64 GUID;
- uint64_t Index;
- uint8_t Type;
+ uint32_t InlineTreeIndex = 0;
+ uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
+ std::vector<uint64_t> BlockProbes; // block probes with indices above 64
+ std::vector<uint64_t> CallProbes;
+ std::vector<uint64_t> IndCallProbes;
+ std::vector<uint32_t> InlineTreeNodes;
bool operator==(const PseudoProbeInfo &Other) const {
- return GUID == Other.GUID && Index == Other.Index;
- }
- bool operator!=(const PseudoProbeInfo &Other) const {
- return !(*this == Other);
+ return InlineTreeIndex == Other.InlineTreeIndex &&
+ BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
+ IndCallProbes == Other.IndCallProbes;
}
};
} // end namespace bolt
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
- YamlIO.mapRequired("guid", PI.GUID);
- YamlIO.mapRequired("id", PI.Index);
- YamlIO.mapRequired("type", PI.Type);
+ YamlIO.mapOptional("blx", PI.BlockMask, 0);
+ YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
+ YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
}
static const bool flow = true;
@@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::CallSiteInfo>());
YamlIO.mapOptional("succ", BBP.Successors,
std::vector<bolt::SuccessorInfo>());
- YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
+ YamlIO.mapOptional("probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
}
};
+namespace bolt {
+struct InlineTreeNode {
+ uint32_t ParentIndexDelta;
+ uint32_t CallSiteProbe;
+ // Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
+ uint32_t GUIDIndex;
+ bool operator==(const InlineTreeNode &) const { return false; }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::InlineTreeNode> {
+ static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) {
+ YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX);
+ YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
+ YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
+ }
+
+ static const bool flow = true;
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode)
namespace llvm {
namespace yaml {
@@ -179,8 +204,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
- llvm::yaml::Hex64 GUID{0};
- llvm::yaml::Hex64 PseudoProbeDescHash{0};
+ std::vector<InlineTreeNode> InlineTree;
bool Used{false};
};
} // end namespace bolt
@@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
- YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
- YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
- (uint64_t)0);
+ YamlIO.mapOptional("inline_tree", BFP.InlineTree,
+ std::vector<bolt::InlineTreeNode>());
}
};
@@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
}
};
+namespace bolt {
+struct PseudoProbeDesc {
+ std::vector<Hex64> GUID;
+ std::vector<Hex64> Hash;
+ std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash
+
+ bool operator==(const PseudoProbeDesc &Other) const {
+ // Only treat empty Desc as equal
+ return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
+ Other.Hash.empty() && GUIDHashIdx.empty() &&
+ Other.GUIDHashIdx.empty();
+ }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::PseudoProbeDesc> {
+ static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
+ YamlIO.mapRequired("gs", PD.GUID);
+ YamlIO.mapRequired("gh", PD.GUIDHashIdx);
+ YamlIO.mapRequired("hs", PD.Hash);
+ }
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)
namespace llvm {
namespace yaml {
@@ -258,6 +304,7 @@ namespace bolt {
struct BinaryProfile {
BinaryProfileHeader Header;
std::vector<BinaryFunctionProfile> Functions;
+ PseudoProbeDesc PseudoProbeDesc;
};
} // namespace bolt
@@ -265,6 +312,8 @@ template <> struct MappingTraits<bolt::BinaryProfile> {
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
YamlIO.mapRequired("header", BP.Header);
YamlIO.mapRequired("functions", BP.Functions);
+ YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
+ bolt::PseudoProbeDesc());
}
};
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index 4a9355dfceac9e..aec6e474847605 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -32,8 +32,27 @@ class YAMLProfileWriter {
/// Save execution profile for that instance.
std::error_code writeProfile(const RewriteInstance &RI);
+ using InlineTreeMapTy =
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
+ struct InlineTreeDesc {
+ template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
+ using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
+ using GUIDNumMap = GUIDMapTy<uint32_t>;
+ GUIDNodeMap TopLevelGUIDToInlineTree;
+ GUIDNumMap GUIDIdxMap;
+ GUIDNumMap HashIdxMap;
+ };
+
+ static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
+ convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const InlineTreeDesc &InlineTree, uint64_t GUID);
+
+ static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
+ convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
+
static yaml::bolt::BinaryFunctionProfile
convert(const BinaryFunction &BF, bool UseDFS,
+ const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT = nullptr);
/// Set CallSiteInfo destination fields from \p Symbol and return a target
@@ -42,8 +61,39 @@ class YAMLProfileWriter {
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
uint32_t Offset = 0);
-};
+private:
+ struct InlineTreeNode {
+ const MCDecodedPseudoProbeInlineTree *InlineTree;
+ uint64_t GUID;
+ uint64_t Hash;
+ uint32_t ParentId;
+ uint32_t InlineSite;
+ };
+ static std::vector<InlineTreeNode>
+ collectInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const MCDecodedPseudoProbeInlineTree &Root);
+
+ // 0 - block probe, 1 - indirect call, 2 - direct call
+ using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
+ using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
+ static std::vector<yaml::bolt::PseudoProbeInfo>
+ convertNodeProbes(NodeIdToProbes &NodeProbes);
+
+public:
+ template <typename T>
+ static std::vector<yaml::bolt::PseudoProbeInfo>
+ writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
+ NodeIdToProbes NodeProbes;
+ for (const MCDecodedPseudoProbe &Probe : Probes) {
+ auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
+ if (It == InlineTreeNodeId.end())
+ continue;
+ NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
+ }
+ return convertNodeProbes(NodeProbes);
+ }
+};
} // namespace bolt
} // namespace llvm
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4aeeb1daab1b94..fcde6f5f4642c8 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2321,6 +2321,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
+ // Add probe inline tree nodes.
+ YAMLProfileWriter::InlineTreeDesc InlineTree;
+ if (PseudoProbeDecoder)
+ std::tie(BP.PseudoProbeDesc, InlineTree) =
+ YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
+
if (!opts::BasicAggregation) {
// Convert profile for functions not covered by BAT
for (auto &BFI : BC.getBinaryFunctions()) {
@@ -2329,8 +2335,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
continue;
if (BAT->isBATFunction(Function.getAddress()))
continue;
- BP.Functions.emplace_back(
- YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
+ BP.Functions.emplace_back(YAMLProfileWriter::convert(
+ Function, /*UseDFS=*/false, InlineTree, BAT));
}
for (const auto &KV : NamesToBranches) {
@@ -2403,16 +2409,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF->getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
+ InlineTreeNodeId;
+ if (BF->getGUID()) {
+ std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+ YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
+ InlineTree, BF->getGUID());
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
+ DenseMap<
+ uint32_t,
+ std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
+ BlockProbes;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
@@ -2421,11 +2433,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
- YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
- yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
- Probe.getType()});
+ BlockProbes[BlockIndex].emplace_back(Probe);
}
}
+
+ for (auto &[Block, Probes] : BlockProbes) {
+ YamlBF.Blocks[Block].PseudoProbes =
+ YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
+ }
}
// Skip printing if there's no profile data
llvm::erase_if(
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index ffbf2388e912fb..44600c3c5d5ef7 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -14,6 +14,7 @@
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -58,8 +59,158 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
return nullptr;
}
+std::vector<YAMLProfileWriter::InlineTreeNode>
+YAMLProfileWriter::collectInlineTree(
+ const MCPseudoProbeDecoder &Decoder,
+ const MCDecodedPseudoProbeInlineTree &Root) {
+ auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
+ return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
+ };
+ std::vector<InlineTreeNode> InlineTree(
+ {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
+ uint32_t ParentId = 0;
+ while (ParentId != InlineTree.size()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
+ for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
+ InlineTree.emplace_back(
+ InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
+ std::get<1>(Child.getInlineSite())});
+ ++ParentId;
+ }
+
+ return InlineTree;
+}
+
+std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc>
+YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
+ yaml::bolt::PseudoProbeDesc Desc;
+ InlineTreeDesc InlineTree;
+
+ for (const MCDecodedPseudoProbeInlineTree &TopLev :
+ Decoder.getDummyInlineRoot().getChildren())
+ InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
+
+ for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
+ ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
+
+ InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
+ for (const auto &Node : Decoder.getInlineTreeVec())
+ ++InlineTree.GUIDIdxMap[Node.Guid];
+
+ std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
+ GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
+ for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
+ GUIDFreqVec.emplace_back(Cnt, GUID);
+ llvm::sort(GUIDFreqVec);
+
+ std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
+ HashFreqVec.reserve(InlineTree.HashIdxMap.size());
+ for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
+ HashFreqVec.emplace_back(Cnt, Hash);
+ llvm::sort(HashFreqVec);
+
+ uint32_t Index = 0;
+ Desc.Hash.reserve(HashFreqVec.size());
+ for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
+ Desc.Hash.emplace_back(Hash);
+ InlineTree.HashIdxMap[Hash] = Index++;
+ }
+
+ Index = 0;
+ Desc.GUID.reserve(GUIDFreqVec.size());
+ for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
+ Desc.GUID.emplace_back(GUID);
+ InlineTree.GUIDIdxMap[GUID] = Index++;
+ uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
+ Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
+ }
+
+ return {Desc, InlineTree};
+}
+
+std::vector<yaml::bolt::PseudoProbeInfo>
+YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
+ struct BlockProbeInfoHasher {
+ size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
+ auto HashCombine = [](auto &Range) {
+ return llvm::hash_combine_range(Range.begin(), Range.end());
+ };
+ return llvm::hash_combine(HashCombine(BPI.BlockProbes),
+ HashCombine(BPI.CallProbes),
+ HashCombine(BPI.IndCallProbes));
+ }
+ };
+
+ // Check identical BlockProbeInfo structs and merge them
+ std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
+ BlockProbeInfoHasher>
+ BPIToNodes;
+ for (auto &[NodeId, Probes] : NodeProbes) {
+ yaml::bolt::PseudoProbeInfo BPI;
+ BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
+ BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
+ BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
+ BPIToNodes[BPI].push_back(NodeId);
+ }
+
+ auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
+ for (auto Id : Ids)
+ if (Id > 64)
+ Vec.emplace_back(Id);
+ else
+ Mask |= 1ull << (Id - 1);
+ };
+
+ // Add to YAML with merged nodes/block mask optimizations
+ std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
+ YamlProbes.reserve(BPIToNodes.size());
+ for (const auto &[BPI, Nodes] : BPIToNodes) {
+ auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
+ YamlBPI.CallProbes = BPI.CallProbes;
+ YamlBPI.IndCallProbes = BPI.IndCallProbes;
+ if (Nodes.size() == 1)
+ YamlBPI.InlineTreeIndex = Nodes.front();
+ else
+ YamlBPI.InlineTreeNodes = Nodes;
+ handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+ }
+ return YamlProbes;
+}
+
+std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
+ YAMLProfileWriter::InlineTreeMapTy>
+YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const InlineTreeDesc &InlineTree,
+ uint64_t GUID) {
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
+ auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
+ if (It == InlineTree.TopLevelGUIDToInlineTree.end())
+ return {YamlInlineTree, InlineTreeNodeId};
+ const MCDecodedPseudoProbeInlineTree *Root = It->second;
+ assert(Root && "Malformed TopLevelGUIDToInlineTree");
+ uint32_t Index = 0;
+ uint32_t PrevParent = 0;
+ uint32_t PrevGUIDIdx = 0;
+ for (const auto &Node : collectInlineTree(Decoder, *Root)) {
+ InlineTreeNodeId[Node.InlineTree] = Index++;
+ auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
+ assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
+ uint32_t GUIDIdx = GUIDIdxIt->second;
+ if (GUIDIdx == PrevGUIDIdx)
+ GUIDIdx = UINT32_MAX;
+ else
+ PrevGUIDIdx = GUIDIdx;
+ YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{
+ Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx});
+ PrevParent = Node.ParentId;
+ }
+ return {YamlInlineTree, InlineTreeNodeId};
+}
+
yaml::bolt::BinaryFunctionProfile
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
+ const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
@@ -77,12 +228,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF.getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
- }
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF.getGUID()) {
+ std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+ convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
}
BinaryFunction::BasicBlockOrderType Order;
@@ -198,10 +347,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
- for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
- YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
- Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+ const std::pair<uint64_t, uint64_t> BlockAddrRange = {
+ FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
+ auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
+ YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
}
YamlBF.Blocks.emplace_back(YamlBB);
@@ -256,6 +405,12 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
}
BP.Header.Flags = ProfileFlags;
+ // Add probe inline tree nodes.
+ InlineTreeDesc InlineTree;
+ if (const MCPseudoProbeDecoder *Decoder =
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
+ std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
+
// Add all function objects.
for (const auto &BFI : Functions) {
const BinaryFunction &BF = BFI.second;
@@ -263,7 +418,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
continue;
- BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
+ BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
}
}
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index 1fdd00c7ef6c4b..e5e8aadc18f9e1 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -14,29 +14,31 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
-# CHECK-YAML: guid: 0xE413754A191DB537
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
+# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
-# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
+# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0xDB956436E78DD5FA
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
+# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
+# CHECK-YAML-NEXT: gh: [ 2, 0, 1 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
#
## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
-# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
-# CHECK-NO-OPT-NOT: pseudo_probes
-# CHECK-NO-OPT-NOT: guid
-# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
CHECK: Report of decoding input pseudo probe binaries
diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test
index 5dd6c2e25bcfe9..36a2fab74e8570 100644
--- a/bolt/test/X86/pseudoprobe-decoding-noinline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test
@@ -1,6 +1,45 @@
# REQUIRES: system-linux
-# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
+# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s
+# PREAGG: B X:0 #foo# 1 0
+# PREAGG: B X:0 #bar# 1 0
+# PREAGG: B X:0 #main# 1 0
+
+## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
+# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
+## Check pseudo-probes in BAT YAML profile (BOLTed binary)
+# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
+# CHECK-YAML: name: bar
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
+#
+# CHECK-YAML: name: foo
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 } ]
+#
+# CHECK-YAML: name: main
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
+# CHECK-YAML: inline_tree: [ { g: 1 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ]
+# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
+#
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
+## generated
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
;; Report of decoding input pseudo probe binaries
; CHECK: GUID: 6699318081062747564 Name: foo
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 32905c1e9a424a..4bfae9eba1a0aa 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -507,6 +507,10 @@ class MCPseudoProbeDecoder {
return iterator_range(It->second);
}
+ const ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
+ return InlineTreeVec;
+ }
+
private:
// Recursively parse an inlining tree encoded in pseudo_probe section. Returns
// whether the the top-level node should be skipped.
More information about the llvm-commits
mailing list