[llvm-branch-commits] [MC][NFC] Statically allocate storage for decoded pseudo probes and function records (PR #102789)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Aug 10 21:34:40 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Use #<!-- -->102774 to allocate storage for decoded probes (`PseudoProbeVec`)
and function records (`InlineTreeVec`). Additionally, probes and inlined
function records are allocated contiguously, which allows to keep track
of them using `ArrayRef`s. This in turn reduces the size of
`MCDecodedPseudoProbeInlineTree`.


---
Full diff: https://github.com/llvm/llvm-project/pull/102789.diff


4 Files Affected:

- (modified) bolt/lib/Rewrite/PseudoProbeRewriter.cpp (+16-7) 
- (modified) llvm/include/llvm/MC/MCPseudoProbe.h (+59-12) 
- (modified) llvm/lib/MC/MCPseudoProbe.cpp (+30-15) 
- (modified) llvm/tools/llvm-profgen/ProfiledBinary.cpp (+5-5) 


``````````diff
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 37a5b937ebcaa..e5ced0f75d187 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
     }
 
     unsigned ProbeTrack = AP.second.size();
-    std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
+    auto Probe = llvm::map_iterator(
+        AP.second.begin(),
+        [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
     while (ProbeTrack != 0) {
       if (Probe->isBlock()) {
         Probe->setAddress(BlkOutputAddress);
@@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
         }
 
         while (CallOutputAddress != CallOutputAddresses.second) {
-          AP.second.push_back(*Probe);
-          AP.second.back().setAddress(CallOutputAddress->second);
-          Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
+          ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
           CallOutputAddress = std::next(CallOutputAddress);
         }
       }
@@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
       ProbeDecoder.getDummyInlineRoot();
   for (auto Child = Root.getChildren().begin();
        Child != Root.getChildren().end(); ++Child)
-    Inlinees[Child->first] = Child->second.get();
+    Inlinees[Child->getInlineSite()] = &*Child;
 
   for (auto Inlinee : Inlinees)
     // INT64_MAX is "placeholder" of unused callsite index field in the pair
@@ -362,7 +362,8 @@ void PseudoProbeRewriter::encodePseudoProbes() {
       if (Probe->getAddress() == INT64_MAX)
         Deleted++;
     LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
-    uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
+    size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
+    uint64_t ProbesSize = Cur->Probes.size() + InjectedProbes - Deleted;
     EmitULEB128IntValue(ProbesSize);
     // Emit number of direct inlinees
     EmitULEB128IntValue(Cur->getChildren().size());
@@ -373,10 +374,18 @@ void PseudoProbeRewriter::encodePseudoProbes() {
       EmitDecodedPseudoProbe(Probe);
       LastProbe = Probe;
     }
+    if (InjectedProbes) {
+      for (MCDecodedPseudoProbe *&Probe : ProbeDecoder.getInjectedProbes(Cur)) {
+        if (Probe->getAddress() == INT64_MAX)
+          continue;
+        EmitDecodedPseudoProbe(Probe);
+        LastProbe = Probe;
+      }
+    }
 
     for (auto Child = Cur->getChildren().begin();
          Child != Cur->getChildren().end(); ++Child)
-      Inlinees[Child->first] = Child->second.get();
+      Inlinees[Child->getInlineSite()] = &*Child;
     for (const auto &Inlinee : Inlinees) {
       assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
       NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 5c536a8ec0b55..5dba797be5c30 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -54,20 +54,21 @@
 #ifndef LLVM_MC_MCPSEUDOPROBE_H
 #define LLVM_MC_MCPSEUDOPROBE_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/Support/ErrorOr.h"
-#include <list>
+#include <functional>
 #include <map>
 #include <memory>
 #include <string>
 #include <tuple>
 #include <type_traits>
 #include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 namespace llvm {
@@ -103,7 +104,9 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
 using GUIDProbeFunctionMap =
     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
 // Address to pseudo probes map.
-using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+using AddressProbesMap =
+    std::map<uint64_t,
+             std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
 
 class MCDecodedPseudoProbeInlineTree;
 
@@ -276,17 +279,31 @@ class MCPseudoProbeInlineTree
 };
 
 // inline tree node for the decoded pseudo probe
-class MCDecodedPseudoProbeInlineTree
-    : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
-                                         MCDecodedPseudoProbeInlineTree> {
+class MCDecodedPseudoProbeInlineTree {
 public:
-  InlineSite ISite;
+  uint64_t Guid = 0;
+  uint32_t ProbeId = 0;
+
+  // Caller node of the inline site
+  MCDecodedPseudoProbeInlineTree *Parent = nullptr;
 
   MCDecodedPseudoProbeInlineTree() = default;
-  MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+  MCDecodedPseudoProbeInlineTree(uint64_t Guid, uint32_t ProbeId,
+                                 MCDecodedPseudoProbeInlineTree *Parent)
+      : Guid(Guid), ProbeId(ProbeId), Parent(Parent) {}
+
+  // Track children (e.g. inlinees) of current context
+  MutableArrayRef<MCDecodedPseudoProbeInlineTree> Children;
+  // Set of probes that come with the function.
+  MutableArrayRef<MCDecodedPseudoProbe> Probes;
 
+  // Root node has a GUID 0.
+  bool isRoot() const { return Guid == 0; }
   // Return false if it's a dummy inline site
   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
+  InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
+  auto getChildren() const { return Children; }
+  auto getProbes() const { return llvm::make_pointer_range(Probes); }
 };
 
 /// Instances of this class represent the pseudo probes inserted into a compile
@@ -336,6 +353,15 @@ class MCPseudoProbeTable {
 };
 
 class MCPseudoProbeDecoder {
+  // Decoded pseudo probes vector.
+  std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
+  // Injected pseudo probes, identified by the containing inline tree node.
+  std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
+                     std::vector<MCDecodedPseudoProbe>>
+      InjectedProbeMap;
+  // Decoded inline records vector.
+  std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;
+
   // GUID to PseudoProbeFuncDesc map.
   GUIDProbeFunctionMap GUID2FuncDescMap;
 
@@ -381,10 +407,6 @@ class MCPseudoProbeDecoder {
                              const Uint64Set &GuildFilter,
                              const Uint64Map &FuncStartAddrs);
 
-  bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
-                             uint64_t &LastAddr, const Uint64Set &GuildFilter,
-                             const Uint64Map &FuncStartAddrs);
-
   // Print pseudo_probe_desc section info
   void printGUID2FuncDescMap(raw_ostream &OS);
 
@@ -427,6 +449,31 @@ class MCPseudoProbeDecoder {
   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
     return DummyInlineRoot;
   }
+
+  void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
+    const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
+    InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
+  }
+
+  size_t
+  getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
+    auto It = InjectedProbeMap.find(Parent);
+    if (It == InjectedProbeMap.end())
+      return 0;
+    return It->second.size();
+  }
+
+  auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
+    auto It = InjectedProbeMap.find(Parent);
+    assert(It != InjectedProbeMap.end());
+    return llvm::make_pointer_range(iterator_range(It->second));
+  }
+
+private:
+  void buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
+                             uint64_t &LastAddr, const Uint64Set &GuildFilter,
+                             const Uint64Map &FuncStartAddrs,
+                             uint32_t &CurChild);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index e9e391c8eadd7..9be93445e14cf 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -290,7 +290,7 @@ void MCDecodedPseudoProbe::getInlineContext(
   while (Cur->hasInlineSite()) {
     StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid);
     ContextStack.emplace_back(
-        MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
+        MCPseudoProbeFrameLocation(FuncName, Cur->ProbeId));
     Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
   }
   // Make the ContextStack in caller-callee order
@@ -417,9 +417,10 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
   return true;
 }
 
-bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
+void MCPseudoProbeDecoder::buildAddress2ProbeMap(
     MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
-    const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) {
+    const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs,
+    uint32_t &CurChild) {
   // The pseudo_probe section encodes an inline forest and each tree has a
   // format defined in MCPseudoProbe.h
 
@@ -427,7 +428,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   bool IsTopLevelFunc = Cur == &DummyInlineRoot;
   if (IsTopLevelFunc) {
     // Use a sequential id for top level inliner.
-    Index = Cur->getChildren().size();
+    Index = CurChild;
   } else {
     // Read inline site for inlinees
     Index = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -443,17 +444,21 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   // If the incoming node is null, all its children nodes should be disgarded.
   if (Cur) {
     // Switch/add to a new tree node(inlinee)
-    Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index));
-    Cur->Guid = Guid;
+    Cur->Children[CurChild] = MCDecodedPseudoProbeInlineTree(Guid, Index, Cur);
+    Cur = &Cur->Children[CurChild];
     if (IsTopLevelFunc && !EncodingIsAddrBased) {
       if (auto V = FuncStartAddrs.lookup(Guid))
         LastAddr = V;
     }
   }
+  // Advance CurChild for non-skipped top-level functions and unconditionally
+  // for inlined functions.
+  CurChild += Cur || !IsTopLevelFunc;
 
   // Read number of probes in the current node.
   uint32_t NodeCount =
       cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
+  uint32_t CurrentProbeCount = 0;
   // Read number of direct inlinees
   uint32_t ChildrenToProcess =
       cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -494,19 +499,22 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     }
 
     if (Cur && !isSentinelProbe(Attr)) {
-      // Populate Address2ProbesMap
-      auto &Probes = Address2ProbesMap[Addr];
-      Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
-                          Discriminator, Cur);
-      Cur->addProbes(&Probes.back());
+      PseudoProbeVec.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind),
+                                  Attr, Discriminator, Cur);
+      Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
+      ++CurrentProbeCount;
     }
     LastAddr = Addr;
   }
 
-  for (uint32_t I = 0; I < ChildrenToProcess; I++) {
-    buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs);
+  if (Cur) {
+    Cur->Probes = MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount);
+    InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess);
+    Cur->Children = MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess);
+  }
+  for (uint32_t I = 0; I < ChildrenToProcess;) {
+    buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I);
   }
-  return true;
 }
 
 bool MCPseudoProbeDecoder::countRecords(bool IsTopLevelFunc, bool &Discard,
@@ -605,13 +613,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     TopLevelFuncs += !Discard;
   }
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+  PseudoProbeVec.reserve(ProbeCount);
+  InlineTreeVec.reserve(InlinedCount);
+
+  // Allocate top-level function records as children of DummyInlineRoot.
+  InlineTreeVec.resize(TopLevelFuncs);
+  DummyInlineRoot.Children = MutableArrayRef(InlineTreeVec);
 
   Data = Start;
   End = Data + Size;
   uint64_t LastAddr = 0;
+  uint32_t Child = 0;
   while (Data < End)
     buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter,
-                          FuncStartAddrs);
+                          FuncStartAddrs, Child);
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
   return true;
 }
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 574a9c9f52bf1..fe7d3ffa476eb 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
     MCPseudoProbeDecoder &ProbeDecoder) {
   ProbeFrameStack ProbeContext;
   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
-    trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext);
+    trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext);
 }
 
 void BinarySizeContextTracker::trackInlineesOptimizedAway(
@@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
 
   // DFS down the probe inline tree
   for (const auto &ChildNode : ProbeNode.getChildren()) {
-    InlineSite Location = ChildNode.first;
+    InlineSite Location = ChildNode.getInlineSite();
     ProbeContext.back().second = std::get<1>(Location);
-    trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext);
+    trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext);
   }
 
   ProbeContext.pop_back();
@@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
   // is available
   if (TrackFuncContextSize) {
-    for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
-      auto *Frame = Child.second.get();
+    for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
+      auto *Frame = &Child;
       StringRef FuncName =
           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
       TopLevelProbeFrameMap[FuncName] = Frame;

``````````

</details>


https://github.com/llvm/llvm-project/pull/102789


More information about the llvm-branch-commits mailing list