[llvm-branch-commits] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 12 07:04:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt

@llvm/pr-subscribers-pgo

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Replace the map from addresses to list of probes with a flat vector
containing probe references sorted by their addresses.

Reduces pseudo probe parsing time from 9.56s to 8.59s and peak RSS from
9.66 GiB to 9.08 GiB as part of perf2bolt processing a large binary.

Test Plan:
```
bin/llvm-lit -sv test/tools/llvm-profgen
```


---
Full diff: https://github.com/llvm/llvm-project/pull/102904.diff


6 Files Affected:

- (modified) bolt/lib/Profile/DataAggregator.cpp (+6-8) 
- (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+4-7) 
- (modified) bolt/lib/Rewrite/PseudoProbeRewriter.cpp (+34-49) 
- (modified) llvm/include/llvm/MC/MCPseudoProbe.h (+19-5) 
- (modified) llvm/lib/MC/MCPseudoProbe.cpp (+22-21) 
- (modified) llvm/tools/llvm-profgen/ProfileGenerator.cpp (+3-5) 


``````````diff
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index a300e5b2b1dabd..813d825f8b570c 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
         Fragments.insert(BF);
         for (const BinaryFunction *F : Fragments) {
           const uint64_t FuncAddr = F->getAddress();
-          const auto &FragmentProbes =
-              llvm::make_range(ProbeMap.lower_bound(FuncAddr),
-                               ProbeMap.lower_bound(FuncAddr + F->getSize()));
-          for (const auto &[OutputAddress, Probes] : FragmentProbes) {
+          for (const MCDecodedPseudoProbe &Probe :
+               ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
+            const uint32_t OutputAddress = Probe.getAddress();
             const uint32_t InputOffset = BAT->translate(
                 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
             const unsigned BlockIndex = getBlock(InputOffset).second;
-            for (const MCDecodedPseudoProbe &Probe : Probes)
-              YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
-                  yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
-                                              Probe.getType()});
+            YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
+                yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
+                                            Probe.getType()});
           }
         }
       }
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 84777741d611a3..f74cf60e076d0a 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
       const uint64_t FuncAddr = BF.getAddress();
       const std::pair<uint64_t, uint64_t> &BlockRange =
           BB->getInputAddressRange();
-      const auto &BlockProbes =
-          llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
-                           ProbeMap.lower_bound(FuncAddr + BlockRange.second));
-      for (const auto &[_, Probes] : BlockProbes)
-        for (const MCDecodedPseudoProbe &Probe : Probes)
-          YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
-              Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+      for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
+               FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
+        YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
+            Probe.getGuid(), Probe.getIndex(), Probe.getType()});
     }
 
     YamlBF.Blocks.emplace_back(YamlBB);
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index d99e4280b0f099..95a0d2c1fbe594 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -173,13 +173,13 @@ void PseudoProbeRewriter::updatePseudoProbes() {
   AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
   const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
 
-  for (auto &AP : Address2ProbesMap) {
-    BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
+  for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+    uint64_t Address = Probe.getAddress();
+    BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
     // If F is removed, eliminate all probes inside it from inline tree
     // Setting probes' addresses as INT64_MAX means elimination
     if (!F) {
-      for (MCDecodedPseudoProbe &Probe : AP.second)
-        Probe.setAddress(INT64_MAX);
+      Probe.setAddress(INT64_MAX);
       continue;
     }
     // If F is not emitted, the function will remain in the same address as its
@@ -187,45 +187,36 @@ void PseudoProbeRewriter::updatePseudoProbes() {
     if (!F->isEmitted())
       continue;
 
-    uint64_t Offset = AP.first - F->getAddress();
+    uint64_t Offset = Address - F->getAddress();
     const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
     uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
     // Check if block output address is defined.
     // If not, such block is removed from binary. Then remove the probes from
     // inline tree
     if (BlkOutputAddress == 0) {
-      for (MCDecodedPseudoProbe &Probe : AP.second)
-        Probe.setAddress(INT64_MAX);
+      Probe.setAddress(INT64_MAX);
       continue;
     }
 
-    unsigned ProbeTrack = AP.second.size();
-    auto Probe = llvm::map_iterator(
-        AP.second.begin(),
-        [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
-    while (ProbeTrack != 0) {
-      if (Probe->isBlock()) {
-        Probe->setAddress(BlkOutputAddress);
-      } else if (Probe->isCall()) {
-        // A call probe may be duplicated due to ICP
-        // Go through output of InputOffsetToAddressMap to collect all related
-        // probes
-        auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
-        auto CallOutputAddress = CallOutputAddresses.first;
-        if (CallOutputAddress == CallOutputAddresses.second) {
-          Probe->setAddress(INT64_MAX);
-        } else {
-          Probe->setAddress(CallOutputAddress->second);
-          CallOutputAddress = std::next(CallOutputAddress);
-        }
-
-        while (CallOutputAddress != CallOutputAddresses.second) {
-          ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
-          CallOutputAddress = std::next(CallOutputAddress);
-        }
+    if (Probe.isBlock()) {
+      Probe.setAddress(BlkOutputAddress);
+    } else if (Probe.isCall()) {
+      // A call probe may be duplicated due to ICP
+      // Go through output of InputOffsetToAddressMap to collect all related
+      // probes
+      auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
+      auto CallOutputAddress = CallOutputAddresses.first;
+      if (CallOutputAddress == CallOutputAddresses.second) {
+        Probe.setAddress(INT64_MAX);
+      } else {
+        Probe.setAddress(CallOutputAddress->second);
+        CallOutputAddress = std::next(CallOutputAddress);
+      }
+
+      while (CallOutputAddress != CallOutputAddresses.second) {
+        ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
+        CallOutputAddress = std::next(CallOutputAddress);
       }
-      Probe = std::next(Probe);
-      ProbeTrack--;
     }
   }
 
@@ -241,22 +232,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
             BinaryBlock.getName();
 
     // scan all addresses -> correlate probe to block when print out
-    std::vector<uint64_t> Addresses;
-    for (auto &Entry : Address2ProbesMap)
-      Addresses.push_back(Entry.first);
-    llvm::sort(Addresses);
-    for (uint64_t Key : Addresses) {
-      for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
-        if (Probe.getAddress() == INT64_MAX)
-          outs() << "Deleted Probe: ";
-        else
-          outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
-        Probe.print(outs(), GUID2Func, true);
-        // print block name only if the probe is block type and undeleted.
-        if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
-          outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
-                 << Addr2BlockNames[Probe.getAddress()] << "\n";
-      }
+    for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+      if (Probe.getAddress() == INT64_MAX)
+        outs() << "Deleted Probe: ";
+      else
+        outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
+      Probe.print(outs(), GUID2Func, true);
+      // print block name only if the probe is block type and undeleted.
+      if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
+        outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
+               << Addr2BlockNames[Probe.getAddress()] << "\n";
     }
     outs() << "=======================================\n";
   }
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index a46188e565c7e8..6021dd38e9d9c6 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -63,7 +63,6 @@
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/Support/ErrorOr.h"
 #include <functional>
-#include <map>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -103,10 +102,6 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
 // GUID to PseudoProbeFuncDesc map
 using GUIDProbeFunctionMap =
     std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
-// Address to pseudo probes map.
-using AddressProbesMap =
-    std::map<uint64_t,
-             std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
 
 class MCDecodedPseudoProbeInlineTree;
 
@@ -213,6 +208,25 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
              bool ShowName) const;
 };
 
+// Address to pseudo probes map.
+class AddressProbesMap
+    : public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
+  auto getIt(uint64_t Addr) const {
+    auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
+      return Probe.getAddress() < Addr;
+    };
+    return llvm::lower_bound(*this, Addr, CompareProbe);
+  }
+
+public:
+  // Returns range of probes within [\p From, \p To) address range.
+  auto find(uint64_t From, uint64_t To) const {
+    return llvm::make_range(getIt(From), getIt(To));
+  }
+  // Returns range of probes with given \p Address.
+  auto find(uint64_t Address) const { return find(Address, Address + 1); }
+};
+
 template <typename ProbeType, typename DerivedProbeInlineTreeType>
 class MCPseudoProbeInlineTreeBase {
   struct InlineSiteHash {
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index c4c2dfcec40564..45fe95e176ff24 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -506,7 +506,6 @@ void MCPseudoProbeDecoder::buildAddress2ProbeMap(
     if (Cur && !isSentinelProbe(Attr)) {
       PseudoProbeVec.emplace_back(Addr, Index, PseudoProbeType(Kind), Attr,
                                   Discriminator, Cur);
-      Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
       ++CurrentProbeCount;
     }
     LastAddr = Addr;
@@ -635,6 +634,15 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
     buildAddress2ProbeMap<true>(&DummyInlineRoot, LastAddr, GuidFilter,
                                 FuncStartAddrs, Child);
   assert(Data == End && "Have unprocessed data in pseudo_probe section");
+
+  std::vector<std::pair<uint64_t, uint32_t>> SortedA2P(ProbeCount);
+  for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
+    SortedA2P[I] = {Probe.getAddress(), I};
+  llvm::sort(SortedA2P, llvm::less_first());
+  Address2ProbesMap.reserve(ProbeCount);
+  for (const uint32_t I : llvm::make_second_range(SortedA2P))
+    Address2ProbesMap.emplace_back(PseudoProbeVec[I]);
+  SortedA2P.clear();
   return true;
 }
 
@@ -650,36 +658,29 @@ void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
 
 void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
                                                 uint64_t Address) {
-  auto It = Address2ProbesMap.find(Address);
-  if (It != Address2ProbesMap.end()) {
-    for (const MCDecodedPseudoProbe &Probe : It->second) {
-      OS << " [Probe]:\t";
-      Probe.print(OS, GUID2FuncDescMap, true);
-    }
+  for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
+    OS << " [Probe]:\t";
+    Probe.print(OS, GUID2FuncDescMap, true);
   }
 }
 
 void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
-  auto Entries = make_first_range(Address2ProbesMap);
-  SmallVector<uint64_t, 0> Addresses(Entries.begin(), Entries.end());
-  llvm::sort(Addresses);
-  for (auto K : Addresses) {
-    OS << "Address:\t";
-    OS << K;
-    OS << "\n";
-    printProbeForAddress(OS, K);
+  uint64_t PrevAddress = INT64_MAX;
+  for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
+    uint64_t Address = Probe.getAddress();
+    if (Address != PrevAddress) {
+      PrevAddress = Address;
+      OS << "Address:\t" << Address << '\n';
+    }
+    OS << " [Probe]:\t";
+    Probe.print(OS, GUID2FuncDescMap, true);
   }
 }
 
 const MCDecodedPseudoProbe *
 MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
-  auto It = Address2ProbesMap.find(Address);
-  if (It == Address2ProbesMap.end())
-    return nullptr;
-  const auto &Probes = It->second;
-
   const MCDecodedPseudoProbe *CallProbe = nullptr;
-  for (const MCDecodedPseudoProbe &Probe : Probes) {
+  for (const MCDecodedPseudoProbe &Probe : Address2ProbesMap.find(Address)) {
     if (Probe.isCall()) {
       // Disabling the assert and returning first call probe seen so far.
       // Subsequent call probes, if any, are ignored. Due to the the way
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5094871a1d415d..49b1e9ce86e148 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -1183,11 +1183,9 @@ void ProfileGeneratorBase::extractProbesFromRange(
     do {
       const AddressProbesMap &Address2ProbesMap =
           Binary->getAddress2ProbesMap();
-      auto It = Address2ProbesMap.find(IP.Address);
-      if (It != Address2ProbesMap.end()) {
-        for (const MCDecodedPseudoProbe &Probe : It->second) {
-          ProbeCounter[&Probe] += Count;
-        }
+      for (const MCDecodedPseudoProbe &Probe :
+           Address2ProbesMap.find(IP.Address)) {
+        ProbeCounter[&Probe] += Count;
       }
     } while (IP.advance() && IP.Address <= RangeEnd);
   }

``````````

</details>


https://github.com/llvm/llvm-project/pull/102904


More information about the llvm-branch-commits mailing list