[llvm-branch-commits] [llvm] [BOLT] Add pseudo probe inline tree to YAML profile (PR #107137)
Amir Ayupov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Sep 12 11:39:16 PDT 2024
================
@@ -58,8 +59,158 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
return nullptr;
}
+std::vector<YAMLProfileWriter::InlineTreeNode>
+YAMLProfileWriter::collectInlineTree(
+ const MCPseudoProbeDecoder &Decoder,
+ const MCDecodedPseudoProbeInlineTree &Root) {
+ auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
+ return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
+ };
+ std::vector<InlineTreeNode> InlineTree(
+ {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
+ uint32_t ParentId = 0;
+ while (ParentId != InlineTree.size()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
+ for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
+ InlineTree.emplace_back(
+ InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
+ std::get<1>(Child.getInlineSite())});
+ ++ParentId;
+ }
+
+ return InlineTree;
+}
+
+std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc>
+YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
+ yaml::bolt::PseudoProbeDesc Desc;
+ InlineTreeDesc InlineTree;
+
+ for (const MCDecodedPseudoProbeInlineTree &TopLev :
+ Decoder.getDummyInlineRoot().getChildren())
+ InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
+
+ for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
+ ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
+
+ InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
+ for (const auto &Node : Decoder.getInlineTreeVec())
+ ++InlineTree.GUIDIdxMap[Node.Guid];
+
+ std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
+ GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
+ for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
+ GUIDFreqVec.emplace_back(Cnt, GUID);
+ llvm::sort(GUIDFreqVec);
+
+ std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
+ HashFreqVec.reserve(InlineTree.HashIdxMap.size());
+ for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
+ HashFreqVec.emplace_back(Cnt, Hash);
+ llvm::sort(HashFreqVec);
+
+ uint32_t Index = 0;
+ Desc.Hash.reserve(HashFreqVec.size());
+ for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
+ Desc.Hash.emplace_back(Hash);
+ InlineTree.HashIdxMap[Hash] = Index++;
+ }
+
+ Index = 0;
+ Desc.GUID.reserve(GUIDFreqVec.size());
+ for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
+ Desc.GUID.emplace_back(GUID);
+ InlineTree.GUIDIdxMap[GUID] = Index++;
+ uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
+ Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
+ }
+
+ return {Desc, InlineTree};
+}
+
+std::vector<yaml::bolt::PseudoProbeInfo>
+YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
+ struct BlockProbeInfoHasher {
+ size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
+ auto HashCombine = [](auto &Range) {
+ return llvm::hash_combine_range(Range.begin(), Range.end());
+ };
+ return llvm::hash_combine(HashCombine(BPI.BlockProbes),
+ HashCombine(BPI.CallProbes),
+ HashCombine(BPI.IndCallProbes));
+ }
+ };
+
+ // Check identical BlockProbeInfo structs and merge them
+ std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
+ BlockProbeInfoHasher>
+ BPIToNodes;
+ for (auto &[NodeId, Probes] : NodeProbes) {
+ yaml::bolt::PseudoProbeInfo BPI;
+ BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
+ BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
+ BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
+ BPIToNodes[BPI].push_back(NodeId);
+ }
+
+ auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
+ for (auto Id : Ids)
+ if (Id > 64)
+ Vec.emplace_back(Id);
+ else
+ Mask |= 1ull << (Id - 1);
+ };
+
+ // Add to YAML with merged nodes/block mask optimizations
+ std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
+ YamlProbes.reserve(BPIToNodes.size());
+ for (const auto &[BPI, Nodes] : BPIToNodes) {
+ auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
+ YamlBPI.CallProbes = BPI.CallProbes;
+ YamlBPI.IndCallProbes = BPI.IndCallProbes;
+ if (Nodes.size() == 1)
+ YamlBPI.InlineTreeIndex = Nodes.front();
+ else
+ YamlBPI.InlineTreeNodes = Nodes;
+ handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+ }
+ return YamlProbes;
+}
+
+std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
+ YAMLProfileWriter::InlineTreeMapTy>
+YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const InlineTreeDesc &InlineTree,
+ uint64_t GUID) {
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
+ auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
+ if (It == InlineTree.TopLevelGUIDToInlineTree.end())
+ return {YamlInlineTree, InlineTreeNodeId};
+ const MCDecodedPseudoProbeInlineTree *Root = It->second;
+ assert(Root && "Malformed TopLevelGUIDToInlineTree");
+ uint32_t Index = 0;
+ uint32_t PrevParent = 0;
+ uint32_t PrevGUIDIdx = 0;
----------------
aaupov wrote:
I initially thought the same, but it's a matter of convention. We can assume the implicit value is zero so that if guid 0 is the first, it can be omitted.
https://github.com/llvm/llvm-project/pull/107137
More information about the llvm-branch-commits
mailing list