[lld] [lld][InstrProf] Refactor BPSectionOrderer.cpp (PR #107347)

Thu Sep 5 12:16:30 PDT 2024

https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/107347

>From cca346ff49665f8c751ad1f42a189d017074fc3c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Wed, 4 Sep 2024 19:42:00 -0700
Subject: [PATCH 1/2] [InstrProf][LLD] Refactor

* Rename `constructNodesForCompression()` -> `getUnsForCompression()`
* Pass `duplicateSectionIdxs` as a pointer to make it possible to skip
* Combine `duplicate{Function,Data}SectionIdxs` into one variable
* Compute all `BPFunctionNode` vectors at the end (like `nodesForStartup`)
---
 lld/MachO/BPSectionOrderer.cpp | 118 +++++++++++++++++----------------
 1 file changed, 62 insertions(+), 56 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..3ba844f44fb791 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -60,12 +60,13 @@ getRelocHash(const Reloc &reloc,
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
 
-static void constructNodesForCompression(
-    const SmallVector<const InputSection *> &sections,
+static SmallVector<
+    std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
+getUnsForCompression(
+    ArrayRef<const InputSection *> sections,
     const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
-    const SmallVector<unsigned> &sectionIdxs,
-    std::vector<BPFunctionNode> &nodes,
-    DenseMap<unsigned, SmallVector<unsigned>> &duplicateSectionIdxs,
+    ArrayRef<unsigned> sectionIdxs,
+    DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
     BPFunctionNode::UtilityNodeT &maxUN) {
   TimeTraceScope timeScope("Build nodes for compression");
 
@@ -103,49 +104,53 @@ static void constructNodesForCompression(
     for (auto hash : hashes)
       ++hashFrequency[hash];
 
-  // Merge section that are nearly identical
-  SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
-  DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
-  for (auto &[sectionIdx, hashes] : sectionHashes) {
-    uint64_t wholeHash = 0;
-    for (auto hash : hashes)
-      if (hashFrequency[hash] > 5)
-        wholeHash ^= hash;
-    auto [it, wasInserted] =
-        wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
-    if (wasInserted) {
-      newSectionHashes.emplace_back(sectionIdx, hashes);
-    } else {
-      duplicateSectionIdxs[it->getSecond()].push_back(sectionIdx);
+  if (duplicateSectionIdxs) {
+    // Merge section that are nearly identical
+    SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
+    DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+    for (auto &[sectionIdx, hashes] : sectionHashes) {
+      uint64_t wholeHash = 0;
+      for (auto hash : hashes)
+        if (hashFrequency[hash] > 5)
+          wholeHash ^= hash;
+      auto [it, wasInserted] =
+          wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
+      if (wasInserted) {
+        newSectionHashes.emplace_back(sectionIdx, hashes);
+      } else {
+        (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
+      }
     }
-  }
-  sectionHashes = newSectionHashes;
+    sectionHashes = newSectionHashes;
 
-  // Recompute hash frequencies
-  hashFrequency.clear();
-  for (auto &[sectionIdx, hashes] : sectionHashes)
-    for (auto hash : hashes)
-      ++hashFrequency[hash];
+    // Recompute hash frequencies
+    hashFrequency.clear();
+    for (auto &[sectionIdx, hashes] : sectionHashes)
+      for (auto hash : hashes)
+        ++hashFrequency[hash];
+  }
 
   // Filter rare and common hashes and assign each a unique utility node that
   // doesn't conflict with the trace utility nodes
   DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
   for (auto &[hash, frequency] : hashFrequency) {
-    if (frequency <= 1 || frequency * 2 > wholeHashToSectionIdx.size())
+    if (frequency <= 1 || frequency * 2 > sectionHashes.size())
       continue;
     hashToUN[hash] = ++maxUN;
   }
 
-  std::vector<BPFunctionNode::UtilityNodeT> uns;
+  SmallVector<std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
+      sectionUns;
   for (auto &[sectionIdx, hashes] : sectionHashes) {
+    SmallVector<BPFunctionNode::UtilityNodeT> uns;
     for (auto &hash : hashes) {
       auto it = hashToUN.find(hash);
       if (it != hashToUN.end())
         uns.push_back(it->second);
     }
-    nodes.emplace_back(sectionIdx, uns);
-    uns.clear();
+    sectionUns.emplace_back(sectionIdx, uns);
   }
+  return sectionUns;
 }
 
 DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
@@ -185,10 +190,11 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
                                            sectionIdxs.end());
   }
 
-  std::vector<BPFunctionNode> nodesForStartup;
   BPFunctionNode::UtilityNodeT maxUN = 0;
   DenseMap<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>
       startupSectionIdxUNs;
+  // Used to define the initial order for startup functions.
+  DenseMap<unsigned, size_t> sectionIdxToTimestamp;
   std::unique_ptr<InstrProfReader> reader;
   if (!profilePath.empty()) {
     auto fs = vfs::getRealFileSystem();
@@ -202,8 +208,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     }
     auto &traces = reader->getTemporalProfTraces();
 
-    // Used to define the initial order for startup functions.
-    DenseMap<unsigned, size_t> sectionIdxToTimestamp;
     DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
     for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
       uint64_t currentSize = 0, cutoffSize = 1;
@@ -245,15 +249,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
       ++maxUN;
       sectionIdxToFirstUN.clear();
     }
-
-    // These uns should already be sorted without duplicates.
-    for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
-      nodesForStartup.emplace_back(sectionIdx, uns);
-
-    llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
-      return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
-             std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
-    });
   }
 
   SmallVector<unsigned> sectionIdxsForFunctionCompression,
@@ -271,21 +266,32 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     }
   }
 
-  std::vector<BPFunctionNode> nodesForFunctionCompression,
-      nodesForDataCompression;
   // Map a section index (to be ordered for compression) to a list of duplicate
   // section indices (not ordered for compression).
-  DenseMap<unsigned, SmallVector<unsigned>> duplicateFunctionSectionIdxs,
-      duplicateDataSectionIdxs;
-  constructNodesForCompression(
+  DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
+  auto unsForFunctionCompression = getUnsForCompression(
       sections, sectionToIdx, sectionIdxsForFunctionCompression,
-      nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
-  constructNodesForCompression(
+      &duplicateSectionIdxs, maxUN);
+  auto unsForDataCompression = getUnsForCompression(
       sections, sectionToIdx, sectionIdxsForDataCompression,
-      nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
+      &duplicateSectionIdxs, maxUN);
 
-  // Sort nodes by their Id (which is the section index) because the input
-  // linker order tends to be not bad
+  std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
+      nodesForDataCompression;
+  for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+    nodesForStartup.emplace_back(sectionIdx, uns);
+  for (auto &[sectionIdx, uns] : unsForFunctionCompression)
+    nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+  for (auto &[sectionIdx, uns] : unsForDataCompression)
+    nodesForDataCompression.emplace_back(sectionIdx, uns);
+
+  // Use the first timestamp to define the initial order for startup nodes.
+  llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
+    return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
+           std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
+  });
+  // Sort compression nodes by their Id (which is the section index) because the
+  // input linker order tends to be not bad.
   llvm::sort(nodesForFunctionCompression,
              [](auto &L, auto &R) { return L.Id < R.Id; });
   llvm::sort(nodesForDataCompression,
@@ -318,8 +324,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     if (orderedSections.insert(isec))
       ++numCodeCompressionSections;
 
-    auto It = duplicateFunctionSectionIdxs.find(node.Id);
-    if (It == duplicateFunctionSectionIdxs.end())
+    auto It = duplicateSectionIdxs.find(node.Id);
+    if (It == duplicateSectionIdxs.end())
       continue;
     for (auto dupSecIdx : It->getSecond()) {
       const auto *dupIsec = sections[dupSecIdx];
@@ -332,8 +338,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     const auto *isec = sections[node.Id];
     if (orderedSections.insert(isec))
       ++numDataCompressionSections;
-    auto It = duplicateDataSectionIdxs.find(node.Id);
-    if (It == duplicateDataSectionIdxs.end())
+    auto It = duplicateSectionIdxs.find(node.Id);
+    if (It == duplicateSectionIdxs.end())
       continue;
     for (auto dupSecIdx : It->getSecond()) {
       const auto *dupIsec = sections[dupSecIdx];

>From 5e15152aef1b3357d54790c4330b80229dd7a561 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Thu, 5 Sep 2024 12:16:09 -0700
Subject: [PATCH 2/2] Add docs to getUnsForCompression()

---
 lld/MachO/BPSectionOrderer.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 3ba844f44fb791..97458c96fd80da 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -21,6 +21,8 @@
 using namespace llvm;
 using namespace lld::macho;
 
+using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
+
 /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
 /// "yyyy" are numbers that could change between builds. We need to use the root
 /// symbol name before this suffix so these symbols can be matched with profiles
@@ -60,9 +62,11 @@ getRelocHash(const Reloc &reloc,
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
 
-static SmallVector<
-    std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
-getUnsForCompression(
+/// Given \p sectionIdxs, a list of section indexes, return a list of utility
+/// nodes for each section index. If \p duplicateSectionIdx is provided,
+/// populate it with nearly identical sections. Increment \p maxUN to be the
+/// largest utility node we have used so far.
+static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
     ArrayRef<const InputSection *> sections,
     const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
     ArrayRef<unsigned> sectionIdxs,
@@ -139,10 +143,9 @@ getUnsForCompression(
     hashToUN[hash] = ++maxUN;
   }
 
-  SmallVector<std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
-      sectionUns;
+  SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
   for (auto &[sectionIdx, hashes] : sectionHashes) {
-    SmallVector<BPFunctionNode::UtilityNodeT> uns;
+    UtilityNodes uns;
     for (auto &hash : hashes) {
       auto it = hashToUN.find(hash);
       if (it != hashToUN.end())
@@ -191,8 +194,7 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
   }
 
   BPFunctionNode::UtilityNodeT maxUN = 0;
-  DenseMap<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>
-      startupSectionIdxUNs;
+  DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
   // Used to define the initial order for startup functions.
   DenseMap<unsigned, size_t> sectionIdxToTimestamp;
   std::unique_ptr<InstrProfReader> reader;