[lld] [lld][ELF] Extend profile guided function ordering to ELF binaries (PR #117514)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 6 01:12:02 PST 2024


https://github.com/Colibrow updated https://github.com/llvm/llvm-project/pull/117514

>From 661c46e4c6bc64da15fdddda8a58bcc2f3828aea Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Mon, 25 Nov 2024 11:10:04 +0800
Subject: [PATCH 1/7] [lld][ELF] Extend profile guided function ordering to ELF
 binaries

Extend balanced partitioning implementation to support ELF binaries, enabling
the same startup time and compressed size optimizations previously available for
MachO.

This allows ELF binaries to benefit from profile-guided function ordering
and compression-based section ordering.

Add the lld flags `--irpgo-profile-sort=<profile>` and
`--compression-sort={function,data,both}`.

Thanks to the ellishg, thevinster, and their team's work.
---
 lld/Common/CMakeLists.txt                |   1 +
 lld/Common/SectionOrderer.cpp            | 383 +++++++++++++++++++++
 lld/ELF/BPSectionOrderer.cpp             |  67 ++++
 lld/ELF/BPSectionOrderer.h               | 139 ++++++++
 lld/ELF/CMakeLists.txt                   |   1 +
 lld/ELF/Config.h                         |   5 +
 lld/ELF/Driver.cpp                       |  39 +++
 lld/ELF/Options.td                       |  14 +
 lld/ELF/Writer.cpp                       |  12 +-
 lld/MachO/BPSectionOrderer.cpp           | 410 +----------------------
 lld/MachO/BPSectionOrderer.h             | 136 ++++++++
 lld/include/lld/Common/SectionOrderer.h  |  75 +++++
 lld/test/ELF/bp-section-orderer-errs.s   |  44 +++
 lld/test/ELF/bp-section-orderer-stress.s | 104 ++++++
 lld/test/ELF/bp-section-orderer.s        | 154 +++++++++
 15 files changed, 1186 insertions(+), 398 deletions(-)
 create mode 100644 lld/Common/SectionOrderer.cpp
 create mode 100644 lld/ELF/BPSectionOrderer.cpp
 create mode 100644 lld/ELF/BPSectionOrderer.h
 create mode 100644 lld/include/lld/Common/SectionOrderer.h
 create mode 100644 lld/test/ELF/bp-section-orderer-errs.s
 create mode 100644 lld/test/ELF/bp-section-orderer-stress.s
 create mode 100644 lld/test/ELF/bp-section-orderer.s

diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 4f503d04f7844f..bd5a40af41c1bc 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -31,6 +31,7 @@ add_lld_library(lldCommon
   Filesystem.cpp
   Memory.cpp
   Reproduce.cpp
+  SectionOrderer.cpp
   Strings.cpp
   TargetOptionsCommandFlags.cpp
   Timer.cpp
diff --git a/lld/Common/SectionOrderer.cpp b/lld/Common/SectionOrderer.cpp
new file mode 100644
index 00000000000000..64c78030f3427f
--- /dev/null
+++ b/lld/Common/SectionOrderer.cpp
@@ -0,0 +1,383 @@
+//===- SectionOrderer.cpp---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lld/Common/SectionOrderer.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/xxhash.h"
+
+#define DEBUG_TYPE "bp-section-orderer"
+using namespace llvm;
+using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
+
+namespace lld {
+
+static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
+    ArrayRef<const BPSectionBase *> sections,
+    const DenseMap<const BPSectionBase *, uint64_t> &sectionToIdx,
+    ArrayRef<unsigned> sectionIdxs,
+    DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
+    BPFunctionNode::UtilityNodeT &maxUN) {
+  TimeTraceScope timeScope("Build nodes for compression");
+
+  SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
+  sectionHashes.reserve(sectionIdxs.size());
+  SmallVector<uint64_t> hashes;
+
+  for (unsigned sectionIdx : sectionIdxs) {
+    const auto *isec = sections[sectionIdx];
+    isec->getSectionHash(hashes, sectionToIdx);
+    sectionHashes.emplace_back(sectionIdx, std::move(hashes));
+    hashes.clear();
+  }
+
+  DenseMap<uint64_t, unsigned> hashFrequency;
+  for (auto &[sectionIdx, hashes] : sectionHashes)
+    for (auto hash : hashes)
+      ++hashFrequency[hash];
+
+  if (duplicateSectionIdxs) {
+    // Merge section that are nearly identical
+    SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
+    DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+    for (auto &[sectionIdx, hashes] : sectionHashes) {
+      uint64_t wholeHash = 0;
+      for (auto hash : hashes)
+        if (hashFrequency[hash] > 5)
+          wholeHash ^= hash;
+      auto [it, wasInserted] =
+          wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
+      if (wasInserted) {
+        newSectionHashes.emplace_back(sectionIdx, hashes);
+      } else {
+        (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
+      }
+    }
+    sectionHashes = newSectionHashes;
+
+    // Recompute hash frequencies
+    hashFrequency.clear();
+    for (auto &[sectionIdx, hashes] : sectionHashes)
+      for (auto hash : hashes)
+        ++hashFrequency[hash];
+  }
+
+  // Filter rare and common hashes and assign each a unique utility node that
+  // doesn't conflict with the trace utility nodes
+  DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
+  for (auto &[hash, frequency] : hashFrequency) {
+    if (frequency <= 1 || frequency * 2 > sectionHashes.size())
+      continue;
+    hashToUN[hash] = ++maxUN;
+  }
+
+  SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
+  for (auto &[sectionIdx, hashes] : sectionHashes) {
+    UtilityNodes uns;
+    for (auto &hash : hashes) {
+      auto it = hashToUN.find(hash);
+      if (it != hashToUN.end())
+        uns.push_back(it->second);
+    }
+    sectionUns.emplace_back(sectionIdx, uns);
+  }
+  return sectionUns;
+}
+
+llvm::DenseMap<const BPSectionBase *, size_t>
+SectionOrderer::reorderSectionsByBalancedPartitioning(
+    size_t &highestAvailablePriority, llvm::StringRef profilePath,
+    bool forFunctionCompression, bool forDataCompression,
+    bool compressionSortStartupFunctions, bool verbose,
+    SmallVector<BPSectionBase *> inputSections) {
+  TimeTraceScope timeScope("Balanced Partitioning");
+  SmallVector<const BPSectionBase *> sections;
+  DenseMap<const BPSectionBase *, uint64_t> sectionToIdx;
+  StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
+
+  // Process input sections
+  for (const auto *isec : inputSections) {
+    if (!isec->hasValidData())
+      continue;
+
+    unsigned sectionIdx = sections.size();
+    sectionToIdx.try_emplace(isec, sectionIdx);
+    sections.push_back(isec);
+
+    for (auto *sym : isec->getSymbols()) {
+      if (auto *d = sym->asDefinedSymbol())
+        symbolToSectionIdxs[d->getName()].insert(sectionIdx);
+    }
+  }
+  StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
+  for (auto &entry : symbolToSectionIdxs) {
+    StringRef name = entry.getKey();
+    auto &sectionIdxs = entry.getValue();
+    name = BPSectionBase::getRootSymbol(name);
+    rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+                                         sectionIdxs.end());
+    // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
+    // Mangler::getNameWithPrefix() for details.
+    if (name.consume_front("_") || name.consume_front("l_"))
+      rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+                                           sectionIdxs.end());
+  }
+
+  BPFunctionNode::UtilityNodeT maxUN = 0;
+  DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
+  // Used to define the initial order for startup functions.
+  DenseMap<unsigned, size_t> sectionIdxToTimestamp;
+  std::unique_ptr<InstrProfReader> reader;
+  if (!profilePath.empty()) {
+    auto fs = vfs::getRealFileSystem();
+    auto readerOrErr = InstrProfReader::create(profilePath, *fs);
+    lld::checkError(readerOrErr.takeError());
+
+    reader = std::move(readerOrErr.get());
+    for (auto &entry : *reader) {
+      // Read all entries
+      (void)entry;
+    }
+    auto &traces = reader->getTemporalProfTraces();
+
+    DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
+    for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
+      uint64_t currentSize = 0, cutoffSize = 1;
+      size_t cutoffTimestamp = 1;
+      auto &trace = traces[traceIdx].FunctionNameRefs;
+      for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
+        auto [Filename, ParsedFuncName] = getParsedIRPGOName(
+            reader->getSymtab().getFuncOrVarName(trace[timestamp]));
+        ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
+
+        auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
+        if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
+          continue;
+        auto &sectionIdxs = sectionIdxsIt->getValue();
+        // If the same symbol is found in multiple sections, they might be
+        // identical, so we arbitrarily use the size from the first section.
+        currentSize += sections[*sectionIdxs.begin()]->getSize();
+
+        // Since BalancedPartitioning is sensitive to the initial order, we need
+        // to explicitly define it to be ordered by earliest timestamp.
+        for (unsigned sectionIdx : sectionIdxs) {
+          auto [it, wasInserted] =
+              sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
+          if (!wasInserted)
+            it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
+        }
+
+        if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
+          ++maxUN;
+          cutoffSize = 2 * currentSize;
+          cutoffTimestamp = 2 * cutoffTimestamp;
+        }
+        for (unsigned sectionIdx : sectionIdxs)
+          sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
+      }
+      for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
+        for (auto un = firstUN; un <= maxUN; ++un)
+          startupSectionIdxUNs[sectionIdx].push_back(un);
+      ++maxUN;
+      sectionIdxToFirstUN.clear();
+    }
+  }
+
+  SmallVector<unsigned> sectionIdxsForFunctionCompression,
+      sectionIdxsForDataCompression;
+  for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
+    if (startupSectionIdxUNs.count(sectionIdx))
+      continue;
+    const auto *isec = sections[sectionIdx];
+    if (isec->isCodeSection()) {
+      if (forFunctionCompression)
+        sectionIdxsForFunctionCompression.push_back(sectionIdx);
+    } else {
+      if (forDataCompression)
+        sectionIdxsForDataCompression.push_back(sectionIdx);
+    }
+  }
+
+  if (compressionSortStartupFunctions) {
+    SmallVector<unsigned> startupIdxs;
+    for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+      startupIdxs.push_back(sectionIdx);
+    auto unsForStartupFunctionCompression =
+        getUnsForCompression(sections, sectionToIdx, startupIdxs,
+                             /*duplicateSectionIdxs=*/nullptr, maxUN);
+    for (auto &[sectionIdx, compressionUns] :
+         unsForStartupFunctionCompression) {
+      auto &uns = startupSectionIdxUNs[sectionIdx];
+      uns.append(compressionUns);
+      llvm::sort(uns);
+      uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
+    }
+  }
+
+  // Map a section index (order directly) to a list of duplicate section indices
+  // (not ordered directly).
+  DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
+  auto unsForFunctionCompression = getUnsForCompression(
+      sections, sectionToIdx, sectionIdxsForFunctionCompression,
+      &duplicateSectionIdxs, maxUN);
+  auto unsForDataCompression = getUnsForCompression(
+      sections, sectionToIdx, sectionIdxsForDataCompression,
+      &duplicateSectionIdxs, maxUN);
+
+  std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
+      nodesForDataCompression;
+  for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+    nodesForStartup.emplace_back(sectionIdx, uns);
+  for (auto &[sectionIdx, uns] : unsForFunctionCompression)
+    nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+  for (auto &[sectionIdx, uns] : unsForDataCompression)
+    nodesForDataCompression.emplace_back(sectionIdx, uns);
+
+  // Use the first timestamp to define the initial order for startup nodes.
+  llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
+    return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
+           std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
+  });
+  // Sort compression nodes by their Id (which is the section index) because the
+  // input linker order tends to be not bad.
+  llvm::sort(nodesForFunctionCompression,
+             [](auto &L, auto &R) { return L.Id < R.Id; });
+  llvm::sort(nodesForDataCompression,
+             [](auto &L, auto &R) { return L.Id < R.Id; });
+
+  {
+    TimeTraceScope timeScope("Balanced Partitioning");
+    BalancedPartitioningConfig config;
+    BalancedPartitioning bp(config);
+    bp.run(nodesForStartup);
+    bp.run(nodesForFunctionCompression);
+    bp.run(nodesForDataCompression);
+  }
+
+  unsigned numStartupSections = 0;
+  unsigned numCodeCompressionSections = 0;
+  unsigned numDuplicateCodeSections = 0;
+  unsigned numDataCompressionSections = 0;
+  unsigned numDuplicateDataSections = 0;
+  SetVector<const BPSectionBase *> orderedSections;
+  // Order startup functions,
+  for (auto &node : nodesForStartup) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numStartupSections;
+  }
+  // then functions for compression,
+  for (auto &node : nodesForFunctionCompression) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numCodeCompressionSections;
+
+    auto It = duplicateSectionIdxs.find(node.Id);
+    if (It == duplicateSectionIdxs.end())
+      continue;
+    for (auto dupSecIdx : It->getSecond()) {
+      const auto *dupIsec = sections[dupSecIdx];
+      if (orderedSections.insert(dupIsec))
+        ++numDuplicateCodeSections;
+    }
+  }
+  // then data for compression.
+  for (auto &node : nodesForDataCompression) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numDataCompressionSections;
+    auto It = duplicateSectionIdxs.find(node.Id);
+    if (It == duplicateSectionIdxs.end())
+      continue;
+    for (auto dupSecIdx : It->getSecond()) {
+      const auto *dupIsec = sections[dupSecIdx];
+      if (orderedSections.insert(dupIsec))
+        ++numDuplicateDataSections;
+    }
+  }
+
+  if (verbose) {
+    unsigned numTotalOrderedSections =
+        numStartupSections + numCodeCompressionSections +
+        numDuplicateCodeSections + numDataCompressionSections +
+        numDuplicateDataSections;
+    dbgs()
+        << "Ordered " << numTotalOrderedSections
+        << " sections using balanced partitioning:\n  Functions for startup: "
+        << numStartupSections
+        << "\n  Functions for compression: " << numCodeCompressionSections
+        << "\n  Duplicate functions: " << numDuplicateCodeSections
+        << "\n  Data for compression: " << numDataCompressionSections
+        << "\n  Duplicate data: " << numDuplicateDataSections << "\n";
+
+    if (!profilePath.empty()) {
+      // Evaluate this function order for startup
+      StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
+      const uint64_t pageSize = (1 << 14);
+      uint64_t currentAddress = 0;
+      for (const auto *isec : orderedSections) {
+        for (auto *sym : isec->getSymbols()) {
+          if (auto *d = sym->asDefinedSymbol()) {
+            uint64_t startAddress = currentAddress + d->getValue();
+            uint64_t endAddress = startAddress + d->getSize();
+            uint64_t firstPage = startAddress / pageSize;
+            // I think the kernel might pull in a few pages when one it touched,
+            // so it might be more accurate to force lastPage to be aligned by
+            // 4?
+            uint64_t lastPage = endAddress / pageSize;
+            StringRef rootSymbol = d->getName();
+            rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
+            symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+            if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
+              symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+          }
+        }
+        currentAddress += isec->getSize();
+      }
+
+      // The area under the curve F where F(t) is the total number of page
+      // faults at step t.
+      unsigned area = 0;
+      for (auto &trace : reader->getTemporalProfTraces()) {
+        SmallSet<uint64_t, 0> touchedPages;
+        for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
+          auto traceId = trace.FunctionNameRefs[step];
+          auto [Filename, ParsedFuncName] =
+              getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
+          ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
+          auto it = symbolToPageNumbers.find(ParsedFuncName);
+          if (it != symbolToPageNumbers.end()) {
+            auto &[firstPage, lastPage] = it->getValue();
+            for (uint64_t i = firstPage; i <= lastPage; i++)
+              touchedPages.insert(i);
+          }
+          area += touchedPages.size();
+        }
+      }
+      dbgs() << "Total area under the page fault curve: " << (float)area
+             << "\n";
+    }
+  }
+
+  DenseMap<const BPSectionBase *, size_t> sectionPriorities;
+  for (const auto *isec : orderedSections)
+    sectionPriorities[isec] = --highestAvailablePriority;
+  return sectionPriorities;
+}
+
+} // namespace lld
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
new file mode 100644
index 00000000000000..27c772936a52f6
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -0,0 +1,67 @@
+//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPSectionOrderer.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "lld/Common/CommonLinkerContext.h"
+#include "lld/Common/SectionOrderer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/Support/TimeProfiler.h"
+
+#include "SymbolTable.h"
+#include "Symbols.h"
+
+using namespace llvm;
+using namespace lld::elf;
+
+llvm::DenseMap<const lld::elf::InputSectionBase *, int>
+lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+                                  bool forFunctionCompression,
+                                  bool forDataCompression,
+                                  bool compressionSortStartupFunctions,
+                                  bool verbose) {
+  size_t highestAvailablePriority = std::numeric_limits<int>::max();
+  SmallVector<lld::BPSectionBase *> sections;
+
+  for (Symbol *sym : ctx.symtab->getSymbols()) {
+    if (auto *d = dyn_cast<Defined>(sym)) {
+      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
+        sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+      }
+    }
+  }
+
+  for (ELFFileBase *file : ctx.objectFiles)
+    for (Symbol *sym : file->getLocalSymbols()) {
+      if (auto *d = dyn_cast<Defined>(sym)) {
+        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
+          sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+        }
+      }
+    }
+
+  auto reorderedSections =
+      lld::SectionOrderer::reorderSectionsByBalancedPartitioning(
+          highestAvailablePriority, profilePath, forFunctionCompression,
+          forDataCompression, compressionSortStartupFunctions, verbose,
+          sections);
+
+  DenseMap<const InputSectionBase *, int> result;
+  for (const auto &[BPSectionBase, priority] : reorderedSections) {
+    if (const ELFSection *elfSection = dyn_cast<ELFSection>(BPSectionBase)) {
+      result[elfSection->getSymbol()->getInputSection()] =
+          static_cast<int>(priority);
+      delete const_cast<ELFSection *>(elfSection)->getSymbol();
+      delete const_cast<ELFSection *>(elfSection);
+    }
+  }
+  return result;
+}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
new file mode 100644
index 00000000000000..2ecf116961568d
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.h
@@ -0,0 +1,139 @@
+//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file uses Balanced Partitioning to order sections to improve startup
+/// time and compressed size.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_BPSECTION_ORDERER_H
+#define LLD_ELF_BPSECTION_ORDERER_H
+
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "lld/Common/SectionOrderer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/xxhash.h"
+
+namespace lld::elf {
+
+class InputSection;
+
+class ELFSymbol : public BPSymbol {
+  const Symbol *sym;
+
+public:
+  explicit ELFSymbol(const Symbol *s) : sym(s) {}
+
+  llvm::StringRef getName() const override { return sym->getName(); }
+
+  BPSymbol *asDefinedSymbol() override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return this;
+    return nullptr;
+  }
+
+  uint64_t getValue() const override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return d->value;
+    return 0;
+  }
+
+  uint64_t getSize() const override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return d->size;
+    return 0;
+  }
+
+  InputSectionBase *getInputSection() const {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
+    return nullptr;
+  }
+
+  const Symbol *getSymbol() const { return sym; }
+};
+
+class ELFSection : public BPSectionBase {
+  const InputSectionBase *isec;
+  ELFSymbol *symbol;
+  std::vector<BPSymbol *> symbols;
+
+public:
+  explicit ELFSection(const InputSectionBase *sec, ELFSymbol *sym)
+      : isec(sec), symbol(sym), symbols({sym}) {}
+
+  const InputSectionBase *getSection() const { return isec; }
+
+  ELFSymbol *getSymbol() const { return symbol; }
+  llvm::StringRef getName() const override { return isec->name; }
+
+  uint64_t getSize() const override { return isec->getSize(); }
+
+  bool isCodeSection() const override {
+    return isec->flags & llvm::ELF::SHF_EXECINSTR;
+  }
+
+  bool hasValidData() const override {
+    return isec && !isec->content().empty();
+  }
+
+  llvm::ArrayRef<uint8_t> getSectionData() const override {
+    return isec->content();
+  }
+
+  llvm::ArrayRef<BPSymbol *> getSymbols() const override { return symbols; }
+
+  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
+                      const llvm::DenseMap<const BPSectionBase *, uint64_t>
+                          &sectionToIdx) const override {
+    constexpr unsigned windowSize = 4;
+
+    // Convert BPSectionBase map to InputSection map
+    llvm::DenseMap<const InputSectionBase *, uint64_t> elfSectionToIdx;
+    for (const auto &[sec, idx] : sectionToIdx) {
+      if (auto *elfSec = llvm::dyn_cast<ELFSection>(sec))
+        elfSectionToIdx[elfSec->getSection()] = idx;
+    }
+
+    // Calculate content hashes
+    for (size_t i = 0; i < isec->content().size(); i++) {
+      auto window = isec->content().drop_front(i).take_front(windowSize);
+      hashes.push_back(xxHash64(window));
+    }
+
+    // TODO: Calculate relocation hashes.
+    // Since in ELF, relocations are complex, but the effect without them are
+    // good enough, we just use 0 as their hash.
+
+    llvm::sort(hashes);
+    hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+  }
+
+  static bool classof(const BPSectionBase *s) { return true; }
+};
+
+/// Run Balanced Partitioning to find the optimal function and data order to
+/// improve startup time and compressed size.
+///
+/// It is important that .subsections_via_symbols is used to ensure functions
+/// and data are in their own sections and thus can be reordered.
+llvm::DenseMap<const lld::elf::InputSectionBase *, int>
+runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+                        bool forFunctionCompression, bool forDataCompression,
+                        bool compressionSortStartupFunctions, bool verbose);
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 83d816ddb0601e..7bf70c3cbbadc5 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -56,6 +56,7 @@ add_lld_library(lldELF
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
+  BPSectionOrderer.cpp
   Target.cpp
   Thunks.cpp
   Writer.cpp
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a2836733c2715e..10054f01a5fe5c 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -264,6 +264,11 @@ struct Config {
   bool armBe8 = false;
   BsymbolicKind bsymbolic = BsymbolicKind::None;
   CGProfileSortKind callGraphProfileSort;
+  llvm::StringRef irpgoProfileSortProfilePath;
+  bool compressionSortStartupFunctions = false;
+  bool functionOrderForCompression = false;
+  bool dataOrderForCompression = false;
+  bool verboseBpSectionOrderer = false;
   bool checkSections;
   bool checkDynamicRelocs;
   std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index bc4b967ccbbbb4..87aaedec28b3a0 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1255,6 +1255,45 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       ctx.arg.bsymbolic = BsymbolicKind::All;
   }
   ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
+  ctx.arg.irpgoProfileSortProfilePath =
+      args.getLastArgValue(OPT_irpgo_profile_sort);
+  ctx.arg.compressionSortStartupFunctions =
+      args.hasFlag(OPT_compression_sort_startup_functions,
+                   OPT_no_compression_sort_startup_functions, false);
+  if (!ctx.arg.irpgoProfileSortProfilePath.empty()) {
+    if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
+      ErrAlways(ctx) << "--irpgo-profile-sort is incompatible with "
+                        "--call-graph-ordering-file";
+    }
+  } else {
+    if (ctx.arg.compressionSortStartupFunctions)
+      ErrAlways(ctx)
+          << "--compression-sort-startup-functions must be used with "
+             "--irpgo-profile-sort";
+  }
+
+  if (auto *arg = args.getLastArg(OPT_compression_sort)) {
+    StringRef compressionSortStr = arg->getValue();
+    if (compressionSortStr == "function") {
+      ctx.arg.functionOrderForCompression = true;
+    } else if (compressionSortStr == "data") {
+      ctx.arg.dataOrderForCompression = true;
+    } else if (compressionSortStr == "both") {
+      ctx.arg.functionOrderForCompression = true;
+      ctx.arg.dataOrderForCompression = true;
+    } else if (compressionSortStr != "none") {
+      ErrAlways(ctx) << "unknown value `" + compressionSortStr + "` for " +
+                            arg->getSpelling();
+    }
+    if (ctx.arg.dataOrderForCompression ||
+        ctx.arg.functionOrderForCompression) {
+      if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
+        ErrAlways(ctx) << "--compression-sort is incompatible with "
+                          "--call-graph-ordering-file";
+      }
+    }
+  }
+  ctx.arg.verboseBpSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
   ctx.arg.checkSections =
       args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
   ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index ebe77204264210..0c51bf4e555868 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -141,6 +141,20 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
 def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
   Flags<[HelpHidden]>;
 
+defm irpgo_profile_sort: Eq<"irpgo-profile-sort",
+    "Read the IRPGO profile at <profile> to order sections to improve startup time">;
+
+defm compression_sort_startup_functions: BB<"compression-sort-startup-functions",
+    "Order startup functions to improve compressed size in addition to startup time",
+    "Do not order startup function for compression">;
+
+def compression_sort: JJ<"compression-sort=">,
+    MetaVarName<"[none,function,data,both]">,
+    HelpText<"Order sections to improve compressed size">;
+
+def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
+    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">;
+
 // --chroot doesn't have a help text because it is an internal option.
 def chroot: Separate<["--"], "chroot">;
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index f10cc54c05a0ca..860e528964c465 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -9,6 +9,7 @@
 #include "Writer.h"
 #include "AArch64ErrataFix.h"
 #include "ARMErrataFix.h"
+#include "BPSectionOrderer.h"
 #include "CallGraphSort.h"
 #include "Config.h"
 #include "InputFiles.h"
@@ -1078,8 +1079,17 @@ static void maybeShuffle(Ctx &ctx,
 // Builds section order for handling --symbol-ordering-file.
 static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
   DenseMap<const InputSectionBase *, int> sectionOrder;
+  if (!ctx.arg.irpgoProfileSortProfilePath.empty() ||
+      ctx.arg.functionOrderForCompression || ctx.arg.dataOrderForCompression) {
+    TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
+    sectionOrder = runBalancedPartitioning(
+        ctx, ctx.arg.irpgoProfileSortProfilePath,
+        ctx.arg.functionOrderForCompression, ctx.arg.dataOrderForCompression,
+        ctx.arg.compressionSortStartupFunctions,
+        ctx.arg.verboseBpSectionOrderer);
+  }
   // Use the rarely used option --call-graph-ordering-file to sort sections.
-  if (!ctx.arg.callGraphProfile.empty())
+  else if (!ctx.arg.callGraphProfile.empty())
     return computeCallGraphProfileOrder(ctx);
 
   if (ctx.arg.symbolOrderingFile.empty())
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 5db2242a35ef28..1edf883209f719 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -11,425 +11,41 @@
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/BalancedPartitioning.h"
 #include "llvm/Support/TimeProfiler.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/xxhash.h"
 
-#define DEBUG_TYPE "bp-section-orderer"
 using namespace llvm;
 using namespace lld::macho;
 
-using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
-
-/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
-/// "yyyy" are numbers that could change between builds. We need to use the root
-/// symbol name before this suffix so these symbols can be matched with profiles
-/// which may have different suffixes.
-static StringRef getRootSymbol(StringRef Name) {
-  auto [P0, S0] = Name.rsplit(".llvm.");
-  auto [P1, S1] = P0.rsplit(".__uniq.");
-  return P1;
-}
-
-static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx,
-                             uint64_t offset, uint64_t addend) {
-  return xxHash64((kind + ": " + Twine::utohexstr(sectionIdx) + " + " +
-                   Twine::utohexstr(offset) + " + " + Twine::utohexstr(addend))
-                      .str());
-}
-
-static uint64_t
-getRelocHash(const Reloc &reloc,
-             const DenseMap<const InputSection *, uint64_t> &sectionToIdx) {
-  auto *isec = reloc.getReferentInputSection();
-  std::optional<uint64_t> sectionIdx;
-  auto sectionIdxIt = sectionToIdx.find(isec);
-  if (sectionIdxIt != sectionToIdx.end())
-    sectionIdx = sectionIdxIt->getSecond();
-  std::string kind;
-  if (isec)
-    kind = ("Section " + Twine(static_cast<uint8_t>(isec->kind()))).str();
-  if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
-    kind += (" Symbol " + Twine(static_cast<uint8_t>(sym->kind()))).str();
-    if (auto *d = dyn_cast<Defined>(sym))
-      return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend);
-  }
-  return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
-}
-
-/// Given \p sectionIdxs, a list of section indexes, return a list of utility
-/// nodes for each section index. If \p duplicateSectionIdx is provided,
-/// populate it with nearly identical sections. Increment \p maxUN to be the
-/// largest utility node we have used so far.
-static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
-    ArrayRef<const InputSection *> sections,
-    const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
-    ArrayRef<unsigned> sectionIdxs,
-    DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
-    BPFunctionNode::UtilityNodeT &maxUN) {
-  TimeTraceScope timeScope("Build nodes for compression");
-
-  SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
-  sectionHashes.reserve(sectionIdxs.size());
-  SmallVector<uint64_t> hashes;
-  for (unsigned sectionIdx : sectionIdxs) {
-    const auto *isec = sections[sectionIdx];
-    constexpr unsigned windowSize = 4;
-
-    for (size_t i = 0; i < isec->data.size(); i++) {
-      auto window = isec->data.drop_front(i).take_front(windowSize);
-      hashes.push_back(xxHash64(window));
-    }
-    for (const auto &r : isec->relocs) {
-      if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
-        continue;
-      uint64_t relocHash = getRelocHash(r, sectionToIdx);
-      uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
-      for (uint32_t i = start; i < r.offset + r.length; i++) {
-        auto window = isec->data.drop_front(i).take_front(windowSize);
-        hashes.push_back(xxHash64(window) + relocHash);
-      }
-    }
-
-    llvm::sort(hashes);
-    hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
-
-    sectionHashes.emplace_back(sectionIdx, hashes);
-    hashes.clear();
-  }
-
-  DenseMap<uint64_t, unsigned> hashFrequency;
-  for (auto &[sectionIdx, hashes] : sectionHashes)
-    for (auto hash : hashes)
-      ++hashFrequency[hash];
-
-  if (duplicateSectionIdxs) {
-    // Merge section that are nearly identical
-    SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
-    DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
-    for (auto &[sectionIdx, hashes] : sectionHashes) {
-      uint64_t wholeHash = 0;
-      for (auto hash : hashes)
-        if (hashFrequency[hash] > 5)
-          wholeHash ^= hash;
-      auto [it, wasInserted] =
-          wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
-      if (wasInserted) {
-        newSectionHashes.emplace_back(sectionIdx, hashes);
-      } else {
-        (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
-      }
-    }
-    sectionHashes = newSectionHashes;
-
-    // Recompute hash frequencies
-    hashFrequency.clear();
-    for (auto &[sectionIdx, hashes] : sectionHashes)
-      for (auto hash : hashes)
-        ++hashFrequency[hash];
-  }
-
-  // Filter rare and common hashes and assign each a unique utility node that
-  // doesn't conflict with the trace utility nodes
-  DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
-  for (auto &[hash, frequency] : hashFrequency) {
-    if (frequency <= 1 || frequency * 2 > sectionHashes.size())
-      continue;
-    hashToUN[hash] = ++maxUN;
-  }
-
-  SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
-  for (auto &[sectionIdx, hashes] : sectionHashes) {
-    UtilityNodes uns;
-    for (auto &hash : hashes) {
-      auto it = hashToUN.find(hash);
-      if (it != hashToUN.end())
-        uns.push_back(it->second);
-    }
-    sectionUns.emplace_back(sectionIdx, uns);
-  }
-  return sectionUns;
-}
-
 DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     size_t &highestAvailablePriority, StringRef profilePath,
     bool forFunctionCompression, bool forDataCompression,
     bool compressionSortStartupFunctions, bool verbose) {
 
-  SmallVector<const InputSection *> sections;
-  DenseMap<const InputSection *, uint64_t> sectionToIdx;
-  StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
+  SmallVector<BPSectionBase *> sections;
   for (const auto *file : inputFiles) {
     for (auto *sec : file->sections) {
       for (auto &subsec : sec->subsections) {
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty() || !isec->data.data())
           continue;
-        unsigned sectionIdx = sections.size();
-        sectionToIdx.try_emplace(isec, sectionIdx);
-        sections.push_back(isec);
-        for (Symbol *sym : isec->symbols)
-          if (auto *d = dyn_cast_or_null<Defined>(sym))
-            symbolToSectionIdxs[d->getName()].insert(sectionIdx);
+        sections.push_back(new MachoSection(isec));
       }
     }
   }
 
-  StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
-  for (auto &entry : symbolToSectionIdxs) {
-    StringRef name = entry.getKey();
-    auto &sectionIdxs = entry.getValue();
-    name = getRootSymbol(name);
-    rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
-                                         sectionIdxs.end());
-    // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
-    // Mangler::getNameWithPrefix() for details.
-    if (name.consume_front("_") || name.consume_front("l_"))
-      rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
-                                           sectionIdxs.end());
-  }
-
-  BPFunctionNode::UtilityNodeT maxUN = 0;
-  DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
-  // Used to define the initial order for startup functions.
-  DenseMap<unsigned, size_t> sectionIdxToTimestamp;
-  std::unique_ptr<InstrProfReader> reader;
-  if (!profilePath.empty()) {
-    auto fs = vfs::getRealFileSystem();
-    auto readerOrErr = InstrProfReader::create(profilePath, *fs);
-    lld::checkError(readerOrErr.takeError());
+  auto reorderedSections =
+      lld::SectionOrderer::reorderSectionsByBalancedPartitioning(
+          highestAvailablePriority, profilePath, forFunctionCompression,
+          forDataCompression, compressionSortStartupFunctions, verbose,
+          sections);
 
-    reader = std::move(readerOrErr.get());
-    for (auto &entry : *reader) {
-      // Read all entries
-      (void)entry;
+  DenseMap<const InputSection *, size_t> result;
+  for (const auto &[BPSectionBase, priority] : reorderedSections) {
+    if (auto *machoSection = dyn_cast<MachoSection>(BPSectionBase)) {
+      result[machoSection->getSection()] = priority;
+      delete machoSection;
     }
-    auto &traces = reader->getTemporalProfTraces();
-
-    DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
-    for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
-      uint64_t currentSize = 0, cutoffSize = 1;
-      size_t cutoffTimestamp = 1;
-      auto &trace = traces[traceIdx].FunctionNameRefs;
-      for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
-        auto [Filename, ParsedFuncName] = getParsedIRPGOName(
-            reader->getSymtab().getFuncOrVarName(trace[timestamp]));
-        ParsedFuncName = getRootSymbol(ParsedFuncName);
-
-        auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
-        if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
-          continue;
-        auto &sectionIdxs = sectionIdxsIt->getValue();
-        // If the same symbol is found in multiple sections, they might be
-        // identical, so we arbitrarily use the size from the first section.
-        currentSize += sections[*sectionIdxs.begin()]->getSize();
-
-        // Since BalancedPartitioning is sensitive to the initial order, we need
-        // to explicitly define it to be ordered by earliest timestamp.
-        for (unsigned sectionIdx : sectionIdxs) {
-          auto [it, wasInserted] =
-              sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
-          if (!wasInserted)
-            it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
-        }
-
-        if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
-          ++maxUN;
-          cutoffSize = 2 * currentSize;
-          cutoffTimestamp = 2 * cutoffTimestamp;
-        }
-        for (unsigned sectionIdx : sectionIdxs)
-          sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
-      }
-      for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
-        for (auto un = firstUN; un <= maxUN; ++un)
-          startupSectionIdxUNs[sectionIdx].push_back(un);
-      ++maxUN;
-      sectionIdxToFirstUN.clear();
-    }
-  }
-
-  SmallVector<unsigned> sectionIdxsForFunctionCompression,
-      sectionIdxsForDataCompression;
-  for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
-    if (startupSectionIdxUNs.count(sectionIdx))
-      continue;
-    const auto *isec = sections[sectionIdx];
-    if (isCodeSection(isec)) {
-      if (forFunctionCompression)
-        sectionIdxsForFunctionCompression.push_back(sectionIdx);
-    } else {
-      if (forDataCompression)
-        sectionIdxsForDataCompression.push_back(sectionIdx);
-    }
-  }
-
-  if (compressionSortStartupFunctions) {
-    SmallVector<unsigned> startupIdxs;
-    for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
-      startupIdxs.push_back(sectionIdx);
-    auto unsForStartupFunctionCompression =
-        getUnsForCompression(sections, sectionToIdx, startupIdxs,
-                             /*duplicateSectionIdxs=*/nullptr, maxUN);
-    for (auto &[sectionIdx, compressionUns] :
-         unsForStartupFunctionCompression) {
-      auto &uns = startupSectionIdxUNs[sectionIdx];
-      uns.append(compressionUns);
-      llvm::sort(uns);
-      uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
-    }
-  }
-
-  // Map a section index (order directly) to a list of duplicate section indices
-  // (not ordered directly).
-  DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
-  auto unsForFunctionCompression = getUnsForCompression(
-      sections, sectionToIdx, sectionIdxsForFunctionCompression,
-      &duplicateSectionIdxs, maxUN);
-  auto unsForDataCompression = getUnsForCompression(
-      sections, sectionToIdx, sectionIdxsForDataCompression,
-      &duplicateSectionIdxs, maxUN);
-
-  std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
-      nodesForDataCompression;
-  for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
-    nodesForStartup.emplace_back(sectionIdx, uns);
-  for (auto &[sectionIdx, uns] : unsForFunctionCompression)
-    nodesForFunctionCompression.emplace_back(sectionIdx, uns);
-  for (auto &[sectionIdx, uns] : unsForDataCompression)
-    nodesForDataCompression.emplace_back(sectionIdx, uns);
-
-  // Use the first timestamp to define the initial order for startup nodes.
-  llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
-    return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
-           std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
-  });
-  // Sort compression nodes by their Id (which is the section index) because the
-  // input linker order tends to be not bad.
-  llvm::sort(nodesForFunctionCompression,
-             [](auto &L, auto &R) { return L.Id < R.Id; });
-  llvm::sort(nodesForDataCompression,
-             [](auto &L, auto &R) { return L.Id < R.Id; });
-
-  {
-    TimeTraceScope timeScope("Balanced Partitioning");
-    BalancedPartitioningConfig config;
-    BalancedPartitioning bp(config);
-    bp.run(nodesForStartup);
-    bp.run(nodesForFunctionCompression);
-    bp.run(nodesForDataCompression);
   }
-
-  unsigned numStartupSections = 0;
-  unsigned numCodeCompressionSections = 0;
-  unsigned numDuplicateCodeSections = 0;
-  unsigned numDataCompressionSections = 0;
-  unsigned numDuplicateDataSections = 0;
-  SetVector<const InputSection *> orderedSections;
-  // Order startup functions,
-  for (auto &node : nodesForStartup) {
-    const auto *isec = sections[node.Id];
-    if (orderedSections.insert(isec))
-      ++numStartupSections;
-  }
-  // then functions for compression,
-  for (auto &node : nodesForFunctionCompression) {
-    const auto *isec = sections[node.Id];
-    if (orderedSections.insert(isec))
-      ++numCodeCompressionSections;
-
-    auto It = duplicateSectionIdxs.find(node.Id);
-    if (It == duplicateSectionIdxs.end())
-      continue;
-    for (auto dupSecIdx : It->getSecond()) {
-      const auto *dupIsec = sections[dupSecIdx];
-      if (orderedSections.insert(dupIsec))
-        ++numDuplicateCodeSections;
-    }
-  }
-  // then data for compression.
-  for (auto &node : nodesForDataCompression) {
-    const auto *isec = sections[node.Id];
-    if (orderedSections.insert(isec))
-      ++numDataCompressionSections;
-    auto It = duplicateSectionIdxs.find(node.Id);
-    if (It == duplicateSectionIdxs.end())
-      continue;
-    for (auto dupSecIdx : It->getSecond()) {
-      const auto *dupIsec = sections[dupSecIdx];
-      if (orderedSections.insert(dupIsec))
-        ++numDuplicateDataSections;
-    }
-  }
-
-  if (verbose) {
-    unsigned numTotalOrderedSections =
-        numStartupSections + numCodeCompressionSections +
-        numDuplicateCodeSections + numDataCompressionSections +
-        numDuplicateDataSections;
-    dbgs()
-        << "Ordered " << numTotalOrderedSections
-        << " sections using balanced partitioning:\n  Functions for startup: "
-        << numStartupSections
-        << "\n  Functions for compression: " << numCodeCompressionSections
-        << "\n  Duplicate functions: " << numDuplicateCodeSections
-        << "\n  Data for compression: " << numDataCompressionSections
-        << "\n  Duplicate data: " << numDuplicateDataSections << "\n";
-
-    if (!profilePath.empty()) {
-      // Evaluate this function order for startup
-      StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
-      const uint64_t pageSize = (1 << 14);
-      uint64_t currentAddress = 0;
-      for (const auto *isec : orderedSections) {
-        for (Symbol *sym : isec->symbols) {
-          if (auto *d = dyn_cast_or_null<Defined>(sym)) {
-            uint64_t startAddress = currentAddress + d->value;
-            uint64_t endAddress = startAddress + d->size;
-            uint64_t firstPage = startAddress / pageSize;
-            // I think the kernel might pull in a few pages when one it touched,
-            // so it might be more accurate to force lastPage to be aligned by
-            // 4?
-            uint64_t lastPage = endAddress / pageSize;
-            StringRef rootSymbol = d->getName();
-            rootSymbol = getRootSymbol(rootSymbol);
-            symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
-            if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
-              symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
-          }
-        }
-
-        currentAddress += isec->getSize();
-      }
-
-      // The area under the curve F where F(t) is the total number of page
-      // faults at step t.
-      unsigned area = 0;
-      for (auto &trace : reader->getTemporalProfTraces()) {
-        SmallSet<uint64_t, 0> touchedPages;
-        for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
-          auto traceId = trace.FunctionNameRefs[step];
-          auto [Filename, ParsedFuncName] =
-              getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
-          ParsedFuncName = getRootSymbol(ParsedFuncName);
-          auto it = symbolToPageNumbers.find(ParsedFuncName);
-          if (it != symbolToPageNumbers.end()) {
-            auto &[firstPage, lastPage] = it->getValue();
-            for (uint64_t i = firstPage; i <= lastPage; i++)
-              touchedPages.insert(i);
-          }
-          area += touchedPages.size();
-        }
-      }
-      dbgs() << "Total area under the page fault curve: " << (float)area
-             << "\n";
-    }
-  }
-
-  DenseMap<const InputSection *, size_t> sectionPriorities;
-  for (const auto *isec : orderedSections)
-    sectionPriorities[isec] = --highestAvailablePriority;
-  return sectionPriorities;
+  return result;
 }
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index cefd0ceb10e561..3f006ed2fb3618 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -14,13 +14,149 @@
 #ifndef LLD_MACHO_BPSECTION_ORDERER_H
 #define LLD_MACHO_BPSECTION_ORDERER_H
 
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "lld/Common/SectionOrderer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
 
 namespace lld::macho {
 
 class InputSection;
 
+class MachoSymbol : public BPSymbol {
+  const Symbol *sym;
+
+public:
+  explicit MachoSymbol(const Symbol *s) : sym(s) {}
+
+  llvm::StringRef getName() const override { return sym->getName(); }
+
+  BPSymbol *asDefinedSymbol() override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return this;
+    return nullptr;
+  }
+
+  uint64_t getValue() const override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return d->value;
+    return 0;
+  }
+
+  uint64_t getSize() const override {
+    if (auto *d = llvm::dyn_cast<Defined>(sym))
+      return d->size;
+    return 0;
+  }
+
+  const Symbol *getSymbol() const { return sym; }
+};
+
+class MachoSection : public BPSectionBase {
+  const InputSection *isec;
+  mutable std::vector<std::unique_ptr<MachoSymbol>> symbolCache;
+
+public:
+  explicit MachoSection(const InputSection *sec) : isec(sec) {}
+
+  const InputSection *getSection() const { return isec; }
+
+  llvm::StringRef getName() const override { return isec->getName(); }
+
+  uint64_t getSize() const override { return isec->getSize(); }
+
+  bool isCodeSection() const override { return macho::isCodeSection(isec); }
+
+  bool hasValidData() const override {
+    return isec && !isec->data.empty() && isec->data.data();
+  }
+
+  llvm::ArrayRef<uint8_t> getSectionData() const override { return isec->data; }
+
+  llvm::ArrayRef<BPSymbol *> getSymbols() const override {
+    // Lazy initialization of symbol cache
+    if (symbolCache.empty()) {
+      for (const auto *sym : isec->symbols)
+        symbolCache.push_back(std::make_unique<MachoSymbol>(sym));
+    }
+    static std::vector<BPSymbol *> result;
+    result.clear();
+    for (const auto &sym : symbolCache)
+      result.push_back(sym.get());
+    return result;
+  }
+
+  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
+                      const llvm::DenseMap<const BPSectionBase *, uint64_t>
+                          &sectionToIdx) const override {
+    constexpr unsigned windowSize = 4;
+
+    // Convert BPSectionBase map to InputSection map
+    llvm::DenseMap<const InputSection *, uint64_t> machoSectionToIdx;
+    for (const auto &[sec, idx] : sectionToIdx) {
+      if (auto *machoSec = llvm::dyn_cast<MachoSection>(sec))
+        machoSectionToIdx[machoSec->getInputSection()] = idx;
+    }
+
+    // Calculate content hashes
+    for (size_t i = 0; i < isec->data.size(); i++) {
+      auto window = isec->data.drop_front(i).take_front(windowSize);
+      hashes.push_back(xxHash64(window));
+    }
+
+    // Calculate relocation hashes
+    for (const auto &r : isec->relocs) {
+      if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
+        continue;
+
+      uint64_t relocHash = getRelocHash(r, machoSectionToIdx);
+      uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
+      for (uint32_t i = start; i < r.offset + r.length; i++) {
+        auto window = isec->data.drop_front(i).take_front(windowSize);
+        hashes.push_back(xxHash64(window) + relocHash);
+      }
+    }
+
+    llvm::sort(hashes);
+    hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+  }
+
+  const InputSection *getInputSection() const { return isec; }
+
+  static bool classof(const BPSectionBase *s) { return true; }
+
+private:
+  static uint64_t getRelocHash(
+      const Reloc &reloc,
+      const llvm::DenseMap<const InputSection *, uint64_t> &sectionToIdx) {
+    auto *isec = reloc.getReferentInputSection();
+    std::optional<uint64_t> sectionIdx;
+    auto sectionIdxIt = sectionToIdx.find(isec);
+    if (sectionIdxIt != sectionToIdx.end())
+      sectionIdx = sectionIdxIt->getSecond();
+
+    std::string kind;
+    if (isec)
+      kind = ("Section " + Twine(isec->kind())).str();
+
+    if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
+      kind += (" Symbol " + Twine(sym->kind())).str();
+      if (auto *d = llvm::dyn_cast<Defined>(sym)) {
+        if (llvm::isa_and_nonnull<CStringInputSection>(isec))
+          return BPSectionBase::getRelocHash(kind, 0, isec->getOffset(d->value),
+                                             reloc.addend);
+        return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
+                                           d->value, reloc.addend);
+      }
+    }
+    return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0,
+                                       reloc.addend);
+  }
+};
+
 /// Run Balanced Partitioning to find the optimal function and data order to
 /// improve startup time and compressed size.
 ///
diff --git a/lld/include/lld/Common/SectionOrderer.h b/lld/include/lld/Common/SectionOrderer.h
new file mode 100644
index 00000000000000..7edb79c57338d1
--- /dev/null
+++ b/lld/include/lld/Common/SectionOrderer.h
@@ -0,0 +1,75 @@
+//===- SectionOrderer.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interfaces which may be used by
+// BPSectionOrderer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COMMON_SECTION_ORDERER_H
+#define LLD_COMMON_SECTION_ORDERER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/xxhash.h"
+
+namespace lld {
+
+class BPSymbol {
+
+public:
+  virtual ~BPSymbol() = default;
+  virtual llvm::StringRef getName() const = 0;
+  virtual BPSymbol *asDefinedSymbol() = 0;
+  virtual uint64_t getValue() const = 0;
+  virtual uint64_t getSize() const = 0;
+};
+
+class BPSectionBase {
+public:
+  virtual ~BPSectionBase() = default;
+  virtual llvm::StringRef getName() const = 0;
+  virtual uint64_t getSize() const = 0;
+  virtual bool hasValidData() const = 0;
+  virtual bool isCodeSection() const = 0;
+  virtual llvm::ArrayRef<uint8_t> getSectionData() const = 0;
+  virtual llvm::ArrayRef<BPSymbol *> getSymbols() const = 0;
+  virtual void
+  getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
+                 const llvm::DenseMap<const BPSectionBase *, uint64_t>
+                     &sectionToIdx) const = 0;
+  static llvm::StringRef getRootSymbol(llvm::StringRef Name) {
+    auto [P0, S0] = Name.rsplit(".llvm.");
+    auto [P1, S1] = P0.rsplit(".__uniq.");
+    return P1;
+  }
+
+  static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx,
+                               uint64_t offset, uint64_t addend) {
+    return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) +
+                           " + " + llvm::Twine::utohexstr(offset) + " + " +
+                           llvm::Twine::utohexstr(addend))
+                              .str());
+  }
+};
+
+class SectionOrderer {
+public:
+  static llvm::DenseMap<const BPSectionBase *, size_t>
+  reorderSectionsByBalancedPartitioning(
+      size_t &highestAvailablePriority, llvm::StringRef profilePath,
+      bool forFunctionCompression, bool forDataCompression,
+      bool compressionSortStartupFunctions, bool verbose,
+      llvm::SmallVector<BPSectionBase *> inputSections);
+};
+
+} // namespace lld
+
+#endif
diff --git a/lld/test/ELF/bp-section-orderer-errs.s b/lld/test/ELF/bp-section-orderer-errs.s
new file mode 100644
index 00000000000000..6e41928cc00c05
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer-errs.s
@@ -0,0 +1,44 @@
+# REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t
+# RUN: echo "A B 5" > %t.call_graph
+# RUN: echo "B C 50" >> %t.call_graph
+# RUN: echo "C D 40" >> %t.call_graph
+# RUN: echo "D B 10" >> %t.call_graph
+# RUN: not ld.lld -o /dev/null %t --irpgo-profile-sort %s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# RUN: not ld.lld -o /dev/null %t --irpgo-profile-sort=%s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# IRPGO-ERR: --irpgo-profile-sort is incompatible with --call-graph-ordering-file
+
+# RUN: not ld.lld -o /dev/null --compression-sort=function --call-graph-ordering-file %t.call_graph 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
+# COMPRESSION-ERR: --compression-sort is incompatible with --call-graph-ordering-file
+
+# RUN: not ld.lld -o /dev/null --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
+# COMPRESSION-MALFORM: unknown value `malformed` for --compression-sort=
+
+# RUN: not ld.lld -o /dev/null --compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=STARTUP
+# STARTUP: --compression-sort-startup-functions must be used with --irpgo-profile-sort
+
+# CHECK: B
+# CHECK-NEXT: C
+# CHECK-NEXT: D
+# CHECK-NEXT: A
+
+.section    .text.A,"ax", at progbits
+.globl  A
+A:
+ nop
+
+.section    .text.B,"ax", at progbits
+.globl  B
+B:
+ nop
+
+.section    .text.C,"ax", at progbits
+.globl  C
+C:
+ nop
+
+.section    .text.D,"ax", at progbits
+.globl  D
+D:
+ nop
diff --git a/lld/test/ELF/bp-section-orderer-stress.s b/lld/test/ELF/bp-section-orderer-stress.s
new file mode 100644
index 00000000000000..a8997269214e48
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer-stress.s
@@ -0,0 +1,104 @@
+# REQUIRES: aarch64
+
+# Generate a large test case and check that the output is deterministic.
+
+# RUN: %python %s %t.s %t.proftext
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
+# RUN: llvm-profdata merge %t.proftext -o %t.profdata
+
+# RUN: ld.lld -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: ld.lld -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: diff %t.order1.txt %t.order2.txt
+
+import random
+import sys
+
+assembly_filepath = sys.argv[1]
+proftext_filepath = sys.argv[2]
+
+random.seed(1234)
+num_functions = 1000
+num_data = 100
+num_traces = 10
+
+function_names = [f"f{n}" for n in range(num_functions)]
+data_names = [f"d{n}" for n in range(num_data)]
+profiled_functions = function_names[: int(num_functions / 2)]
+
+function_contents = [
+    f"""
+{name}:
+  add w0, w0, #{i % 4096}
+  add w1, w1, #{i % 10}
+  add w2, w0, #{i % 20}
+  adrp x3, {name}
+  ret
+"""
+    for i, name in enumerate(function_names)
+]
+
+data_contents = [
+      f"""
+{name}:
+  .ascii "s{i % 2}-{i % 3}-{i % 5}"
+  .xword {name}
+"""
+    for i, name in enumerate(data_names)
+]
+
+trace_contents = [
+    f"""
+# Weight
+1
+{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
+"""
+    for i in range(num_traces)
+]
+
+profile_contents = [
+    f"""
+{name}
+# Func Hash:
+{i}
+# Num Counters:
+1
+# Counter Values:
+1
+"""
+    for i, name in enumerate(profiled_functions)
+]
+
+with open(assembly_filepath, "w") as f:
+    f.write(
+        f"""
+.text
+.globl _main
+
+_main:
+  ret
+
+{"".join(function_contents)}
+
+.data
+{"".join(data_contents)}
+
+"""
+    )
+
+with open(proftext_filepath, "w") as f:
+    f.write(
+        f"""
+:ir
+:temporal_prof_traces
+
+# Num Traces
+{num_traces}
+# Trace Stream Size:
+{num_traces}
+
+{"".join(trace_contents)}
+
+{"".join(profile_contents)}
+"""
+    )
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
new file mode 100644
index 00000000000000..bf1b86dc2b893c
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -0,0 +1,154 @@
+# REQUIRES: aarch64
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+
+# STARTUP: Ordered 3 sections using balanced partitioning
+
+# RUN: ld.lld -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata --symbol-ordering-file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: A
+# ORDERFILE: F
+# ORDERFILE: E
+# ORDERFILE: D
+# ORDERFILE-DAG: _main
+# ORDERFILE-DAG: _B
+# ORDERFILE-DAG: l_C
+# ORDERFILE-DAG: s1
+# ORDERFILE-DAG: s2
+# ORDERFILE-DAG: r1
+# ORDERFILE-DAG: r2
+
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# COMPRESSION-FUNC: Ordered 14 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 8 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 22 sections using balanced partitioning
+
+#--- a.s
+.text
+.p2align 2
+
+.section .text._main,"ax", at progbits
+.globl _main
+_main:
+  ret
+
+.section .text.A,"ax", at progbits
+.globl A
+A:
+  ret
+
+.section .text._B,"ax", at progbits
+.globl _B
+_B:
+  add w0, w0, #1
+  bl  A
+  ret
+
+.section .text.l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222,"ax", at progbits
+.globl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
+  add w0, w0, #2
+  bl  A
+  ret
+
+.section .text.D,"ax", at progbits
+.globl D
+D:
+  add w0, w0, #2
+  bl _B
+  ret
+
+.section .text.E,"ax", at progbits
+.globl E
+E:
+  add w0, w0, #2
+  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+
+.section .text.F,"ax", at progbits
+.globl F
+F:
+  add w0, w0, #3
+  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+
+.data
+.p2align 3
+
+.section .data.s1,"aw", at progbits
+.globl s1
+s1:
+  .ascii "hello world"
+
+.section .data.s2,"aw", at progbits
+.globl s2
+s2:
+  .ascii "i am a string"
+
+.section .data.r1,"aw", at progbits
+.globl r1
+r1:
+  .quad s1
+
+.section .data.r2,"aw", at progbits
+.globl r2
+r2:
+  .quad r1
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+
+A
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+B
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+D
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+A
+F
+E
+D

>From b51799d964a98f6444381a621acd04bcc8112f32 Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Wed, 27 Nov 2024 14:01:45 +0800
Subject: [PATCH 2/7] docs: clarify .subsections_via_symbols directive usage
 for MachO/ELF

The .subsections_via_symbols directive is primarily used in MachO object
files and may not have the same meaning in ELF format. Update documentation
to reflect that:

- Specify MachO-specific nature of .subsections_via_symbols
- Note that ELF equivalents are -ffunction-sections and -fdata-sections
- Add guidance for assembler usage with ELF format
---
 lld/ELF/BPSectionOrderer.cpp      |   8 +-
 lld/ELF/BPSectionOrderer.h        |   5 +-
 lld/test/ELF/bp-section-orderer.s | 382 ++++++++++++++++++++++--------
 3 files changed, 291 insertions(+), 104 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 27c772936a52f6..7cae055883cce6 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -34,7 +34,9 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
   for (Symbol *sym : ctx.symtab->getSymbols()) {
     if (auto *d = dyn_cast<Defined>(sym)) {
       if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
-        sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+        if (sym->getSize() > 0) {
+          sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+        }
       }
     }
   }
@@ -43,7 +45,9 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
     for (Symbol *sym : file->getLocalSymbols()) {
       if (auto *d = dyn_cast<Defined>(sym)) {
         if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
-          sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+          if (sym->getSize() > 0) {
+            sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
+          }
         }
       }
     }
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 2ecf116961568d..1d868df287a028 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -128,8 +128,9 @@ class ELFSection : public BPSectionBase {
 /// Run Balanced Partitioning to find the optimal function and data order to
 /// improve startup time and compressed size.
 ///
-/// It is important that .subsections_via_symbols is used to ensure functions
-/// and data are in their own sections and thus can be reordered.
+/// It is important that -ffunction-sections and -fdata-sections are used to
+/// ensure functions and data are in their own sections and thus can be
+/// reordered.
 llvm::DenseMap<const lld::elf::InputSectionBase *, int>
 runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
                         bool forFunctionCompression, bool forDataCompression,
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index bf1b86dc2b893c..6140a54f6a9837 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -4,105 +4,287 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
 # RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
 
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+# The code below is the reduced version of the output
+# from the following invocation and source:
+#
+# // test.cpp:
+#     const char s1[] = "hello world";
+#     const char s2[] = "i am a string"; 
+#     const char *r1 = s1;
+#     const char **r2 = &r1;
+#
+# void A()
+# {
+#     return;
+# }
+#
+# int B(int a)
+# {
+#     A();
+#     return a + 1;
+# }
+#
+# int C(int a)
+# {
+#     A();
+#     return a + 2;
+# }
+#
+# int D(int a)
+# {
+#     return B(a + 2);
+# }
+#
+# int E(int a)
+# {
+#     return C(a + 2);
+# }
+#
+# int F(int a)
+# {
+#     return C(a + 3);
+# }
+#
+# int main()
+# {
+#     return 0;
+# }
+#
+# clang -target aarch64-linux-gnu -ffunction-sections -fdata-sections test.cpp -o test.s -S
+
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
 
 # STARTUP: Ordered 3 sections using balanced partitioning
 
-# RUN: ld.lld -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata --symbol-ordering-file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
-
-# ORDERFILE: A
-# ORDERFILE: F
-# ORDERFILE: E
-# ORDERFILE: D
-# ORDERFILE-DAG: _main
-# ORDERFILE-DAG: _B
-# ORDERFILE-DAG: l_C
-# ORDERFILE-DAG: s1
-# ORDERFILE-DAG: s2
-# ORDERFILE-DAG: r1
-# ORDERFILE-DAG: r2
-
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# RUN: ld.lld -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-
-# COMPRESSION-FUNC: Ordered 14 sections using balanced partitioning
-# COMPRESSION-DATA: Ordered 8 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 22 sections using balanced partitioning
+# RUN: ld.lld -e main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata --symbol-ordering-file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: _ZL2s1
+# ORDERFILE: _ZL2s2
+# ORDERFILE: _Z1Av
+# ORDERFILE: _Z1Fi
+# ORDERFILE: _Z1Ei
+# ORDERFILE: _Z1Di
+# ORDERFILE: _Z1Ci
+# ORDERFILE: _Z1Bi
+# ORDERFILE: main
+# ORDERFILE: r1
+# ORDERFILE: r2
+
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 4 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 11 sections using balanced partitioning
 
 #--- a.s
-.text
-.p2align 2
-
-.section .text._main,"ax", at progbits
-.globl _main
-_main:
-  ret
-
-.section .text.A,"ax", at progbits
-.globl A
-A:
-  ret
-
-.section .text._B,"ax", at progbits
-.globl _B
-_B:
-  add w0, w0, #1
-  bl  A
-  ret
-
-.section .text.l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222,"ax", at progbits
-.globl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
-l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
-  add w0, w0, #2
-  bl  A
-  ret
-
-.section .text.D,"ax", at progbits
-.globl D
-D:
-  add w0, w0, #2
-  bl _B
-  ret
-
-.section .text.E,"ax", at progbits
-.globl E
-E:
-  add w0, w0, #2
-  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
-  ret
-
-.section .text.F,"ax", at progbits
-.globl F
-F:
-  add w0, w0, #3
-  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
-  ret
-
-.data
-.p2align 3
-
-.section .data.s1,"aw", at progbits
-.globl s1
-s1:
-  .ascii "hello world"
-
-.section .data.s2,"aw", at progbits
-.globl s2
-s2:
-  .ascii "i am a string"
-
-.section .data.r1,"aw", at progbits
-.globl r1
+	.text
+	.file	"test.cpp"
+	.section	.text._Z1Av,"ax", at progbits
+	.globl	_Z1Av                           // -- Begin function _Z1Av
+	.p2align	2
+	.type	_Z1Av, at function
+_Z1Av:                                  // @_Z1Av
+	.cfi_startproc
+// %bb.0:
+	ret
+.Lfunc_end0:
+	.size	_Z1Av, .Lfunc_end0-_Z1Av
+	.cfi_endproc
+                                        // -- End function
+	.section	.text._Z1Bi,"ax", at progbits
+	.globl	_Z1Bi                           // -- Begin function _Z1Bi
+	.p2align	2
+	.type	_Z1Bi, at function
+_Z1Bi:                                  // @_Z1Bi
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #32
+	.cfi_def_cfa_offset 32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	stur	w0, [x29, #-4]
+	bl	_Z1Av
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #1
+	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end1:
+	.size	_Z1Bi, .Lfunc_end1-_Z1Bi
+	.cfi_endproc
+                                        // -- End function
+	.section	.text._Z1Ci,"ax", at progbits
+	.globl	_Z1Ci                           // -- Begin function _Z1Ci
+	.p2align	2
+	.type	_Z1Ci, at function
+_Z1Ci:                                  // @_Z1Ci
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #32
+	.cfi_def_cfa_offset 32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	stur	w0, [x29, #-4]
+	bl	_Z1Av
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end2:
+	.size	_Z1Ci, .Lfunc_end2-_Z1Ci
+	.cfi_endproc
+                                        // -- End function
+	.section	.text._Z1Di,"ax", at progbits
+	.globl	_Z1Di                           // -- Begin function _Z1Di
+	.p2align	2
+	.type	_Z1Di, at function
+_Z1Di:                                  // @_Z1Di
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #32
+	.cfi_def_cfa_offset 32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	bl	_Z1Bi
+	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end3:
+	.size	_Z1Di, .Lfunc_end3-_Z1Di
+	.cfi_endproc
+                                        // -- End function
+	.section	.text._Z1Ei,"ax", at progbits
+	.globl	_Z1Ei                           // -- Begin function _Z1Ei
+	.p2align	2
+	.type	_Z1Ei, at function
+_Z1Ei:                                  // @_Z1Ei
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #32
+	.cfi_def_cfa_offset 32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	bl	_Z1Ci
+	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end4:
+	.size	_Z1Ei, .Lfunc_end4-_Z1Ei
+	.cfi_endproc
+                                        // -- End function
+	.section	.text._Z1Fi,"ax", at progbits
+	.globl	_Z1Fi                           // -- Begin function _Z1Fi
+	.p2align	2
+	.type	_Z1Fi, at function
+_Z1Fi:                                  // @_Z1Fi
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #32
+	.cfi_def_cfa_offset 32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #3
+	bl	_Z1Ci
+	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end5:
+	.size	_Z1Fi, .Lfunc_end5-_Z1Fi
+	.cfi_endproc
+                                        // -- End function
+	.section	.text.main,"ax", at progbits
+	.globl	main                            // -- Begin function main
+	.p2align	2
+	.type	main, at function
+main:                                   // @main
+	.cfi_startproc
+// %bb.0:
+	sub	sp, sp, #16
+	.cfi_def_cfa_offset 16
+	mov	w0, wzr
+	str	wzr, [sp, #12]
+	add	sp, sp, #16
+	.cfi_def_cfa_offset 0
+	ret
+.Lfunc_end6:
+	.size	main, .Lfunc_end6-main
+	.cfi_endproc
+                                        // -- End function
+	.type	_ZL2s1, at object                  // @_ZL2s1
+	.section	.rodata._ZL2s1,"a", at progbits
+_ZL2s1:
+	.asciz	"hello world"
+	.size	_ZL2s1, 12
+
+	.type	r1, at object                      // @r1
+	.section	.data.r1,"aw", at progbits
+	.globl	r1
+	.p2align	3, 0x0
 r1:
-  .quad s1
+	.xword	_ZL2s1
+	.size	r1, 8
+
+	.type	_ZL2s2, at object                  // @_ZL2s2
+	.section	.rodata._ZL2s2,"a", at progbits
+_ZL2s2:
+	.asciz	"i am a string"
+	.size	_ZL2s2, 14
 
-.section .data.r2,"aw", at progbits
-.globl r2
+	.type	r2, at object                      // @r2
+	.section	.data.r2,"aw", at progbits
+	.globl	r2
+	.p2align	3, 0x0
 r2:
-  .quad r1
+	.xword	_ZL2s2
+	.size	r2, 8
 
 #--- a.proftext
 :ir
@@ -113,9 +295,9 @@ r2:
 1
 # Weight
 1
-A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+_Z1Av, _Z1Bi, _Z1Ci
 
-A
+_Z1Av
 # Func Hash:
 1111
 # Num Counters:
@@ -123,7 +305,7 @@ A
 # Counter Values:
 1
 
-B
+_Z1Bi
 # Func Hash:
 2222
 # Num Counters:
@@ -131,7 +313,7 @@ B
 # Counter Values:
 1
 
-C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+_Z1Ci
 # Func Hash:
 3333
 # Num Counters:
@@ -139,7 +321,7 @@ C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
 # Counter Values:
 1
 
-D
+_Z1Di
 # Func Hash:
 4444
 # Num Counters:
@@ -148,7 +330,7 @@ D
 1
 
 #--- a.orderfile
-A
-F
-E
-D
+_Z1Av
+_Z1Fi
+_Z1Ei
+_Z1Di

>From c32ec81b9fc433dd34a2d95e35966c01abe9baea Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Mon, 2 Dec 2024 17:18:17 +0800
Subject: [PATCH 3/7] fix: improve code readability and code reviews

---
 ...onOrderer.cpp => BPSectionOrdererBase.cpp} |  10 +-
 lld/Common/CMakeLists.txt                     |   2 +-
 lld/ELF/BPSectionOrderer.cpp                  |  42 +-
 lld/ELF/BPSectionOrderer.h                    |  24 +-
 lld/ELF/CMakeLists.txt                        |   2 +-
 lld/ELF/Driver.cpp                            |   4 +-
 lld/MachO/BPSectionOrderer.cpp                |   6 +-
 lld/MachO/BPSectionOrderer.h                  |  22 +-
 ...ectionOrderer.h => BPSectionOrdererBase.h} |  13 +-
 lld/test/ELF/bp-section-orderer-errs.s        |  19 +-
 lld/test/ELF/bp-section-orderer.s             | 748 +++++++++++++++---
 11 files changed, 712 insertions(+), 180 deletions(-)
 rename lld/Common/{SectionOrderer.cpp => BPSectionOrdererBase.cpp} (98%)
 rename lld/include/lld/Common/{SectionOrderer.h => BPSectionOrdererBase.h} (84%)

diff --git a/lld/Common/SectionOrderer.cpp b/lld/Common/BPSectionOrdererBase.cpp
similarity index 98%
rename from lld/Common/SectionOrderer.cpp
rename to lld/Common/BPSectionOrdererBase.cpp
index 64c78030f3427f..2fc39cc777f294 100644
--- a/lld/Common/SectionOrderer.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -1,4 +1,5 @@
-//===- SectionOrderer.cpp---------------------------------------*- C++ -*-===//
+//===- BPSectionOrdererBase.cpp---------------------------------------*- C++
+//-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lld/Common/SectionOrderer.h"
+#include "lld/Common/BPSectionOrdererBase.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -100,7 +101,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
 }
 
 llvm::DenseMap<const BPSectionBase *, size_t>
-SectionOrderer::reorderSectionsByBalancedPartitioning(
+BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
     size_t &highestAvailablePriority, llvm::StringRef profilePath,
     bool forFunctionCompression, bool forDataCompression,
     bool compressionSortStartupFunctions, bool verbose,
@@ -119,10 +120,9 @@ SectionOrderer::reorderSectionsByBalancedPartitioning(
     sectionToIdx.try_emplace(isec, sectionIdx);
     sections.push_back(isec);
 
-    for (auto *sym : isec->getSymbols()) {
+    for (auto *sym : isec->getSymbols())
       if (auto *d = sym->asDefinedSymbol())
         symbolToSectionIdxs[d->getName()].insert(sectionIdx);
-    }
   }
   StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
   for (auto &entry : symbolToSectionIdxs) {
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index bd5a40af41c1bc..2ab5093bf6887d 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -24,6 +24,7 @@ set_source_files_properties("${version_inc}"
 
 add_lld_library(lldCommon
   Args.cpp
+  BPSectionOrdererBase.cpp
   CommonLinkerContext.cpp
   DriverDispatcher.cpp
   DWARF.cpp
@@ -31,7 +32,6 @@ add_lld_library(lldCommon
   Filesystem.cpp
   Memory.cpp
   Reproduce.cpp
-  SectionOrderer.cpp
   Strings.cpp
   TargetOptionsCommandFlags.cpp
   Timer.cpp
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 7cae055883cce6..0d9e59e9c1bef2 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -10,8 +10,8 @@
 #include "Config.h"
 #include "InputFiles.h"
 #include "InputSection.h"
+#include "lld/Common/BPSectionOrdererBase.h"
 #include "lld/Common/CommonLinkerContext.h"
-#include "lld/Common/SectionOrderer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/BalancedPartitioning.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -29,42 +29,38 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
                                   bool compressionSortStartupFunctions,
                                   bool verbose) {
   size_t highestAvailablePriority = std::numeric_limits<int>::max();
+  // Collect all InputSectionBase objects from symbols and wrap them as
+  // BPSectionBase instances for balanced partitioning which follow the way
+  // '--symbol-ordering-file' does.
   SmallVector<lld::BPSectionBase *> sections;
 
-  for (Symbol *sym : ctx.symtab->getSymbols()) {
-    if (auto *d = dyn_cast<Defined>(sym)) {
-      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
-        if (sym->getSize() > 0) {
-          sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
-        }
-      }
-    }
-  }
+  for (Symbol *sym : ctx.symtab->getSymbols())
+    if (auto *d = dyn_cast<Defined>(sym))
+      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+        if (sym->getSize() > 0)
+          sections.push_back(new BPSectionELF(sec, new BPSymbolELF(sym)));
 
   for (ELFFileBase *file : ctx.objectFiles)
-    for (Symbol *sym : file->getLocalSymbols()) {
-      if (auto *d = dyn_cast<Defined>(sym)) {
-        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section)) {
-          if (sym->getSize() > 0) {
-            sections.push_back(new ELFSection(sec, new ELFSymbol(sym)));
-          }
-        }
-      }
-    }
+    for (Symbol *sym : file->getLocalSymbols())
+      if (auto *d = dyn_cast<Defined>(sym))
+        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+          if (sym->getSize() > 0)
+            sections.push_back(new BPSectionELF(sec, new BPSymbolELF(sym)));
 
   auto reorderedSections =
-      lld::SectionOrderer::reorderSectionsByBalancedPartitioning(
+      lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
           highestAvailablePriority, profilePath, forFunctionCompression,
           forDataCompression, compressionSortStartupFunctions, verbose,
           sections);
 
   DenseMap<const InputSectionBase *, int> result;
   for (const auto &[BPSectionBase, priority] : reorderedSections) {
-    if (const ELFSection *elfSection = dyn_cast<ELFSection>(BPSectionBase)) {
+    if (const BPSectionELF *elfSection =
+            dyn_cast<BPSectionELF>(BPSectionBase)) {
       result[elfSection->getSymbol()->getInputSection()] =
           static_cast<int>(priority);
-      delete const_cast<ELFSection *>(elfSection)->getSymbol();
-      delete const_cast<ELFSection *>(elfSection);
+      delete const_cast<BPSectionELF *>(elfSection)->getSymbol();
+      delete const_cast<BPSectionELF *>(elfSection);
     }
   }
   return result;
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 1d868df287a028..12089e86ac1748 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -18,7 +18,7 @@
 #include "InputSection.h"
 #include "Relocations.h"
 #include "Symbols.h"
-#include "lld/Common/SectionOrderer.h"
+#include "lld/Common/BPSectionOrdererBase.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
@@ -31,11 +31,11 @@ namespace lld::elf {
 
 class InputSection;
 
-class ELFSymbol : public BPSymbol {
+class BPSymbolELF : public BPSymbol {
   const Symbol *sym;
 
 public:
-  explicit ELFSymbol(const Symbol *s) : sym(s) {}
+  explicit BPSymbolELF(const Symbol *s) : sym(s) {}
 
   llvm::StringRef getName() const override { return sym->getName(); }
 
@@ -66,18 +66,18 @@ class ELFSymbol : public BPSymbol {
   const Symbol *getSymbol() const { return sym; }
 };
 
-class ELFSection : public BPSectionBase {
+class BPSectionELF : public BPSectionBase {
   const InputSectionBase *isec;
-  ELFSymbol *symbol;
-  std::vector<BPSymbol *> symbols;
+  BPSymbolELF *symbol;
+  llvm::SmallVector<BPSymbol *, 0> symbols;
 
 public:
-  explicit ELFSection(const InputSectionBase *sec, ELFSymbol *sym)
+  explicit BPSectionELF(const InputSectionBase *sec, BPSymbolELF *sym)
       : isec(sec), symbol(sym), symbols({sym}) {}
 
   const InputSectionBase *getSection() const { return isec; }
 
-  ELFSymbol *getSymbol() const { return symbol; }
+  BPSymbolELF *getSymbol() const { return symbol; }
   llvm::StringRef getName() const override { return isec->name; }
 
   uint64_t getSize() const override { return isec->getSize(); }
@@ -103,13 +103,13 @@ class ELFSection : public BPSectionBase {
 
     // Convert BPSectionBase map to InputSection map
     llvm::DenseMap<const InputSectionBase *, uint64_t> elfSectionToIdx;
-    for (const auto &[sec, idx] : sectionToIdx) {
-      if (auto *elfSec = llvm::dyn_cast<ELFSection>(sec))
+    for (const auto &[sec, idx] : sectionToIdx)
+      if (auto *elfSec = llvm::dyn_cast<BPSectionELF>(sec))
         elfSectionToIdx[elfSec->getSection()] = idx;
-    }
 
     // Calculate content hashes
-    for (size_t i = 0; i < isec->content().size(); i++) {
+    size_t size = isec->content().size();
+    for (size_t i = 0; i < size; i++) {
       auto window = isec->content().drop_front(i).take_front(windowSize);
       hashes.push_back(xxHash64(window));
     }
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 7bf70c3cbbadc5..298443cd6ea42c 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_lld_library(lldELF
   Arch/X86.cpp
   Arch/X86_64.cpp
   ARMErrataFix.cpp
+  BPSectionOrderer.cpp
   CallGraphSort.cpp
   DWARF.cpp
   Driver.cpp
@@ -56,7 +57,6 @@ add_lld_library(lldELF
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
-  BPSectionOrderer.cpp
   Target.cpp
   Thunks.cpp
   Writer.cpp
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 87aaedec28b3a0..007194d4547b53 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1282,8 +1282,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       ctx.arg.functionOrderForCompression = true;
       ctx.arg.dataOrderForCompression = true;
     } else if (compressionSortStr != "none") {
-      ErrAlways(ctx) << "unknown value `" + compressionSortStr + "` for " +
-                            arg->getSpelling();
+      ErrAlways(ctx) << "unknown value '" << compressionSortStr << "' for "
+                     << arg->getSpelling();
     }
     if (ctx.arg.dataOrderForCompression ||
         ctx.arg.functionOrderForCompression) {
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 1edf883209f719..40052ff09dcc64 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -29,20 +29,20 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty() || !isec->data.data())
           continue;
-        sections.push_back(new MachoSection(isec));
+        sections.push_back(new BPSectionMacho(isec));
       }
     }
   }
 
   auto reorderedSections =
-      lld::SectionOrderer::reorderSectionsByBalancedPartitioning(
+      lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
           highestAvailablePriority, profilePath, forFunctionCompression,
           forDataCompression, compressionSortStartupFunctions, verbose,
           sections);
 
   DenseMap<const InputSection *, size_t> result;
   for (const auto &[BPSectionBase, priority] : reorderedSections) {
-    if (auto *machoSection = dyn_cast<MachoSection>(BPSectionBase)) {
+    if (auto *machoSection = dyn_cast<BPSectionMacho>(BPSectionBase)) {
       result[machoSection->getSection()] = priority;
       delete machoSection;
     }
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 3f006ed2fb3618..b57e814120a22b 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -17,7 +17,7 @@
 #include "InputSection.h"
 #include "Relocations.h"
 #include "Symbols.h"
-#include "lld/Common/SectionOrderer.h"
+#include "lld/Common/BPSectionOrdererBase.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
@@ -26,11 +26,11 @@ namespace lld::macho {
 
 class InputSection;
 
-class MachoSymbol : public BPSymbol {
+class BPSymbolMacho : public BPSymbol {
   const Symbol *sym;
 
 public:
-  explicit MachoSymbol(const Symbol *s) : sym(s) {}
+  explicit BPSymbolMacho(const Symbol *s) : sym(s) {}
 
   llvm::StringRef getName() const override { return sym->getName(); }
 
@@ -55,12 +55,12 @@ class MachoSymbol : public BPSymbol {
   const Symbol *getSymbol() const { return sym; }
 };
 
-class MachoSection : public BPSectionBase {
+class BPSectionMacho : public BPSectionBase {
   const InputSection *isec;
-  mutable std::vector<std::unique_ptr<MachoSymbol>> symbolCache;
+  mutable std::vector<std::unique_ptr<BPSymbolMacho>> symbolCache;
 
 public:
-  explicit MachoSection(const InputSection *sec) : isec(sec) {}
+  explicit BPSectionMacho(const InputSection *sec) : isec(sec) {}
 
   const InputSection *getSection() const { return isec; }
 
@@ -80,7 +80,7 @@ class MachoSection : public BPSectionBase {
     // Lazy initialization of symbol cache
     if (symbolCache.empty()) {
       for (const auto *sym : isec->symbols)
-        symbolCache.push_back(std::make_unique<MachoSymbol>(sym));
+        symbolCache.push_back(std::make_unique<BPSymbolMacho>(sym));
     }
     static std::vector<BPSymbol *> result;
     result.clear();
@@ -96,13 +96,13 @@ class MachoSection : public BPSectionBase {
 
     // Convert BPSectionBase map to InputSection map
     llvm::DenseMap<const InputSection *, uint64_t> machoSectionToIdx;
-    for (const auto &[sec, idx] : sectionToIdx) {
-      if (auto *machoSec = llvm::dyn_cast<MachoSection>(sec))
+    for (const auto &[sec, idx] : sectionToIdx)
+      if (auto *machoSec = llvm::dyn_cast<BPSectionMacho>(sec))
         machoSectionToIdx[machoSec->getInputSection()] = idx;
-    }
 
     // Calculate content hashes
-    for (size_t i = 0; i < isec->data.size(); i++) {
+    size_t dataSize = isec->data.size();
+    for (size_t i = 0; i < dataSize; i++) {
       auto window = isec->data.drop_front(i).take_front(windowSize);
       hashes.push_back(xxHash64(window));
     }
diff --git a/lld/include/lld/Common/SectionOrderer.h b/lld/include/lld/Common/BPSectionOrdererBase.h
similarity index 84%
rename from lld/include/lld/Common/SectionOrderer.h
rename to lld/include/lld/Common/BPSectionOrdererBase.h
index 7edb79c57338d1..793fd9a8deee79 100644
--- a/lld/include/lld/Common/SectionOrderer.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -1,4 +1,5 @@
-//===- SectionOrderer.h ---------------------------------------*- C++ -*-===//
+//===- BPSectionOrdererBase.h ---------------------------------------*- C++
+//-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLD_COMMON_SECTION_ORDERER_H
-#define LLD_COMMON_SECTION_ORDERER_H
+#ifndef LLD_COMMON_BP_SECTION_ORDERER_BASE_H
+#define LLD_COMMON_BP_SECTION_ORDERER_BASE_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -59,8 +60,10 @@ class BPSectionBase {
                               .str());
   }
 };
-
-class SectionOrderer {
+/// Base class for Balanced Partitioning section ordering, providing common
+/// functionality for both ELF and MachO formats. This shared implementation
+/// reduces code duplication while handling function and data reordering.
+class BPSectionOrdererBase {
 public:
   static llvm::DenseMap<const BPSectionBase *, size_t>
   reorderSectionsByBalancedPartitioning(
diff --git a/lld/test/ELF/bp-section-orderer-errs.s b/lld/test/ELF/bp-section-orderer-errs.s
index 6e41928cc00c05..626ddb99bf5b49 100644
--- a/lld/test/ELF/bp-section-orderer-errs.s
+++ b/lld/test/ELF/bp-section-orderer-errs.s
@@ -1,28 +1,29 @@
 # REQUIRES: aarch64
 
-# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
 # RUN: echo "A B 5" > %t.call_graph
 # RUN: echo "B C 50" >> %t.call_graph
 # RUN: echo "C D 40" >> %t.call_graph
 # RUN: echo "D B 10" >> %t.call_graph
-# RUN: not ld.lld -o /dev/null %t --irpgo-profile-sort %s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
-# RUN: not ld.lld -o /dev/null %t --irpgo-profile-sort=%s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# RUN: not ld.lld %t --irpgo-profile-sort %s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# RUN: not ld.lld %t --irpgo-profile-sort=%s --call-graph-ordering-file=%t.call_graph 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
 # IRPGO-ERR: --irpgo-profile-sort is incompatible with --call-graph-ordering-file
 
-# RUN: not ld.lld -o /dev/null --compression-sort=function --call-graph-ordering-file %t.call_graph 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
+# RUN: not ld.lld --compression-sort=function --call-graph-ordering-file %t.call_graph 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
 # COMPRESSION-ERR: --compression-sort is incompatible with --call-graph-ordering-file
 
-# RUN: not ld.lld -o /dev/null --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
-# COMPRESSION-MALFORM: unknown value `malformed` for --compression-sort=
+# RUN: not ld.lld --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
+# COMPRESSION-MALFORM: unknown value 'malformed' for --compression-sort=
 
-# RUN: not ld.lld -o /dev/null --compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: not ld.lld --compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=STARTUP
 # STARTUP: --compression-sort-startup-functions must be used with --irpgo-profile-sort
 
 # CHECK: B
 # CHECK-NEXT: C
 # CHECK-NEXT: D
 # CHECK-NEXT: A
-
+//--- a.s
 .section    .text.A,"ax", at progbits
 .globl  A
 A:
@@ -41,4 +42,4 @@ C:
 .section    .text.D,"ax", at progbits
 .globl  D
 D:
- nop
+ nop
\ No newline at end of file
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 6140a54f6a9837..22154cc7c959a4 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -1,96 +1,64 @@
 # REQUIRES: aarch64
 
-# RUN: rm -rf %t && split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
-# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
+# RUN: llvm-profdata merge a.proftext -o a.profdata
 
-# The code below is the reduced version of the output
-# from the following invocation and source:
-#
-# // test.cpp:
-#     const char s1[] = "hello world";
-#     const char s2[] = "i am a string"; 
-#     const char *r1 = s1;
-#     const char **r2 = &r1;
-#
-# void A()
-# {
-#     return;
-# }
-#
-# int B(int a)
-# {
-#     A();
-#     return a + 1;
-# }
-#
-# int C(int a)
-# {
-#     A();
-#     return a + 2;
-# }
-#
-# int D(int a)
-# {
-#     return B(a + 2);
-# }
-#
-# int E(int a)
-# {
-#     return C(a + 2);
-# }
-#
-# int F(int a)
-# {
-#     return C(a + 3);
-# }
-#
-# int main()
-# {
-#     return 0;
-# }
-#
-# clang -target aarch64-linux-gnu -ffunction-sections -fdata-sections test.cpp -o test.s -S
+.ifdef GEN
+#--- a.cc
+const char s1[] = "hello world";
+const char s2[] = "i am a string";
+const char* r1 = s1;
+const char** r2 = &r1;
+void A() {
+    return;
+}
 
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+int B(int a) {
+    A();
+    return a + 1;
+}
 
-# STARTUP: Ordered 3 sections using balanced partitioning
+int C(int a) {
+    A();
+    return a + 2;
+}
 
-# RUN: ld.lld -e main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata --symbol-ordering-file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+int D(int a) {
+    return B(a + 2);
+}
 
-# ORDERFILE: _ZL2s1
-# ORDERFILE: _ZL2s2
-# ORDERFILE: _Z1Av
-# ORDERFILE: _Z1Fi
-# ORDERFILE: _Z1Ei
-# ORDERFILE: _Z1Di
-# ORDERFILE: _Z1Ci
-# ORDERFILE: _Z1Bi
-# ORDERFILE: main
-# ORDERFILE: r1
-# ORDERFILE: r2
-
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# RUN: ld.lld -e main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+int E(int a) {
+    return C(a + 2);
+}
 
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
-# COMPRESSION-DATA: Ordered 4 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 11 sections using balanced partitioning
+int F(int a) {
+    return C(a + 3);
+}
 
+int main() {
+    return 0;
+}
+#--- gen
+echo '#--- a.s'
+clang -target aarch64-linux-gnu -fdebug-compilation-dir='/proc/self/cwd' -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S -g a.cc -o -
+.endif
 #--- a.s
 	.text
-	.file	"test.cpp"
+	.file	"a.cc"
+	.file	0 "/proc/self/cwd" "a.cc" md5 0xd88df55d5eb7769f11cfb15e5857b68c
 	.section	.text._Z1Av,"ax", at progbits
 	.globl	_Z1Av                           // -- Begin function _Z1Av
 	.p2align	2
 	.type	_Z1Av, at function
 _Z1Av:                                  // @_Z1Av
+.Lfunc_begin0:
+	.cfi_sections .debug_frame
 	.cfi_startproc
 // %bb.0:
+	.loc	0 6 5 prologue_end              // a.cc:6:5
 	ret
+.Ltmp0:
 .Lfunc_end0:
 	.size	_Z1Av, .Lfunc_end0-_Z1Av
 	.cfi_endproc
@@ -100,26 +68,29 @@ _Z1Av:                                  // @_Z1Av
 	.p2align	2
 	.type	_Z1Bi, at function
 _Z1Bi:                                  // @_Z1Bi
+.Lfunc_begin1:
+	.loc	0 9 0                           // a.cc:9:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
-	.cfi_def_cfa_offset 32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
 	.cfi_def_cfa w29, 16
 	.cfi_offset w30, -8
 	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
+.Ltmp1:
+	.loc	0 10 5 prologue_end             // a.cc:10:5
 	bl	_Z1Av
+	.loc	0 11 12                         // a.cc:11:12
 	ldur	w8, [x29, #-4]
+	.loc	0 11 14 is_stmt 0               // a.cc:11:14
 	add	w0, w8, #1
-	.cfi_def_cfa wsp, 32
+	.loc	0 11 5 epilogue_begin           // a.cc:11:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
-	.cfi_def_cfa_offset 0
-	.cfi_restore w30
-	.cfi_restore w29
 	ret
+.Ltmp2:
 .Lfunc_end1:
 	.size	_Z1Bi, .Lfunc_end1-_Z1Bi
 	.cfi_endproc
@@ -129,26 +100,29 @@ _Z1Bi:                                  // @_Z1Bi
 	.p2align	2
 	.type	_Z1Ci, at function
 _Z1Ci:                                  // @_Z1Ci
+.Lfunc_begin2:
+	.loc	0 14 0 is_stmt 1                // a.cc:14:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
-	.cfi_def_cfa_offset 32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
 	.cfi_def_cfa w29, 16
 	.cfi_offset w30, -8
 	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
+.Ltmp3:
+	.loc	0 15 5 prologue_end             // a.cc:15:5
 	bl	_Z1Av
+	.loc	0 16 12                         // a.cc:16:12
 	ldur	w8, [x29, #-4]
+	.loc	0 16 14 is_stmt 0               // a.cc:16:14
 	add	w0, w8, #2
-	.cfi_def_cfa wsp, 32
+	.loc	0 16 5 epilogue_begin           // a.cc:16:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
-	.cfi_def_cfa_offset 0
-	.cfi_restore w30
-	.cfi_restore w29
 	ret
+.Ltmp4:
 .Lfunc_end2:
 	.size	_Z1Ci, .Lfunc_end2-_Z1Ci
 	.cfi_endproc
@@ -158,26 +132,29 @@ _Z1Ci:                                  // @_Z1Ci
 	.p2align	2
 	.type	_Z1Di, at function
 _Z1Di:                                  // @_Z1Di
+.Lfunc_begin3:
+	.loc	0 19 0 is_stmt 1                // a.cc:19:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
-	.cfi_def_cfa_offset 32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
 	.cfi_def_cfa w29, 16
 	.cfi_offset w30, -8
 	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
+.Ltmp5:
+	.loc	0 20 14 prologue_end            // a.cc:20:14
 	ldur	w8, [x29, #-4]
+	.loc	0 20 16 is_stmt 0               // a.cc:20:16
 	add	w0, w8, #2
+	.loc	0 20 12                         // a.cc:20:12
 	bl	_Z1Bi
-	.cfi_def_cfa wsp, 32
+	.loc	0 20 5 epilogue_begin           // a.cc:20:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
-	.cfi_def_cfa_offset 0
-	.cfi_restore w30
-	.cfi_restore w29
 	ret
+.Ltmp6:
 .Lfunc_end3:
 	.size	_Z1Di, .Lfunc_end3-_Z1Di
 	.cfi_endproc
@@ -187,26 +164,29 @@ _Z1Di:                                  // @_Z1Di
 	.p2align	2
 	.type	_Z1Ei, at function
 _Z1Ei:                                  // @_Z1Ei
+.Lfunc_begin4:
+	.loc	0 23 0 is_stmt 1                // a.cc:23:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
-	.cfi_def_cfa_offset 32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
 	.cfi_def_cfa w29, 16
 	.cfi_offset w30, -8
 	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
+.Ltmp7:
+	.loc	0 24 14 prologue_end            // a.cc:24:14
 	ldur	w8, [x29, #-4]
+	.loc	0 24 16 is_stmt 0               // a.cc:24:16
 	add	w0, w8, #2
+	.loc	0 24 12                         // a.cc:24:12
 	bl	_Z1Ci
-	.cfi_def_cfa wsp, 32
+	.loc	0 24 5 epilogue_begin           // a.cc:24:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
-	.cfi_def_cfa_offset 0
-	.cfi_restore w30
-	.cfi_restore w29
 	ret
+.Ltmp8:
 .Lfunc_end4:
 	.size	_Z1Ei, .Lfunc_end4-_Z1Ei
 	.cfi_endproc
@@ -216,26 +196,29 @@ _Z1Ei:                                  // @_Z1Ei
 	.p2align	2
 	.type	_Z1Fi, at function
 _Z1Fi:                                  // @_Z1Fi
+.Lfunc_begin5:
+	.loc	0 27 0 is_stmt 1                // a.cc:27:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
-	.cfi_def_cfa_offset 32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
 	.cfi_def_cfa w29, 16
 	.cfi_offset w30, -8
 	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
+.Ltmp9:
+	.loc	0 28 14 prologue_end            // a.cc:28:14
 	ldur	w8, [x29, #-4]
+	.loc	0 28 16 is_stmt 0               // a.cc:28:16
 	add	w0, w8, #3
+	.loc	0 28 12                         // a.cc:28:12
 	bl	_Z1Ci
-	.cfi_def_cfa wsp, 32
+	.loc	0 28 5 epilogue_begin           // a.cc:28:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
-	.cfi_def_cfa_offset 0
-	.cfi_restore w30
-	.cfi_restore w29
 	ret
+.Ltmp10:
 .Lfunc_end5:
 	.size	_Z1Fi, .Lfunc_end5-_Z1Fi
 	.cfi_endproc
@@ -245,15 +228,19 @@ _Z1Fi:                                  // @_Z1Fi
 	.p2align	2
 	.type	main, at function
 main:                                   // @main
+.Lfunc_begin6:
+	.loc	0 31 0 is_stmt 1                // a.cc:31:0
 	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #16
 	.cfi_def_cfa_offset 16
 	mov	w0, wzr
 	str	wzr, [sp, #12]
+.Ltmp12:
+	.loc	0 32 5 prologue_end epilogue_begin // a.cc:32:5
 	add	sp, sp, #16
-	.cfi_def_cfa_offset 0
 	ret
+.Ltmp13:
 .Lfunc_end6:
 	.size	main, .Lfunc_end6-main
 	.cfi_endproc
@@ -272,20 +259,565 @@ r1:
 	.xword	_ZL2s1
 	.size	r1, 8
 
-	.type	_ZL2s2, at object                  // @_ZL2s2
-	.section	.rodata._ZL2s2,"a", at progbits
-_ZL2s2:
-	.asciz	"i am a string"
-	.size	_ZL2s2, 14
-
 	.type	r2, at object                      // @r2
 	.section	.data.r2,"aw", at progbits
 	.globl	r2
 	.p2align	3, 0x0
 r2:
-	.xword	_ZL2s2
+	.xword	r1
 	.size	r2, 8
 
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               // Abbreviation Code
+	.byte	17                              // DW_TAG_compile_unit
+	.byte	1                               // DW_CHILDREN_yes
+	.byte	37                              // DW_AT_producer
+	.byte	37                              // DW_FORM_strx1
+	.byte	19                              // DW_AT_language
+	.byte	5                               // DW_FORM_data2
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	114                             // DW_AT_str_offsets_base
+	.byte	23                              // DW_FORM_sec_offset
+	.byte	16                              // DW_AT_stmt_list
+	.byte	23                              // DW_FORM_sec_offset
+	.byte	27                              // DW_AT_comp_dir
+	.byte	37                              // DW_FORM_strx1
+	.byte	17                              // DW_AT_low_pc
+	.byte	1                               // DW_FORM_addr
+	.byte	85                              // DW_AT_ranges
+	.byte	35                              // DW_FORM_rnglistx
+	.byte	115                             // DW_AT_addr_base
+	.byte	23                              // DW_FORM_sec_offset
+	.byte	116                             // DW_AT_rnglists_base
+	.byte	23                              // DW_FORM_sec_offset
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	2                               // Abbreviation Code
+	.byte	52                              // DW_TAG_variable
+	.byte	0                               // DW_CHILDREN_no
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	63                              // DW_AT_external
+	.byte	25                              // DW_FORM_flag_present
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	2                               // DW_AT_location
+	.byte	24                              // DW_FORM_exprloc
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	3                               // Abbreviation Code
+	.byte	15                              // DW_TAG_pointer_type
+	.byte	0                               // DW_CHILDREN_no
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	4                               // Abbreviation Code
+	.byte	38                              // DW_TAG_const_type
+	.byte	0                               // DW_CHILDREN_no
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	5                               // Abbreviation Code
+	.byte	36                              // DW_TAG_base_type
+	.byte	0                               // DW_CHILDREN_no
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	62                              // DW_AT_encoding
+	.byte	11                              // DW_FORM_data1
+	.byte	11                              // DW_AT_byte_size
+	.byte	11                              // DW_FORM_data1
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	6                               // Abbreviation Code
+	.byte	52                              // DW_TAG_variable
+	.byte	0                               // DW_CHILDREN_no
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	2                               // DW_AT_location
+	.byte	24                              // DW_FORM_exprloc
+	.byte	110                             // DW_AT_linkage_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	7                               // Abbreviation Code
+	.byte	1                               // DW_TAG_array_type
+	.byte	1                               // DW_CHILDREN_yes
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	8                               // Abbreviation Code
+	.byte	33                              // DW_TAG_subrange_type
+	.byte	0                               // DW_CHILDREN_no
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	55                              // DW_AT_count
+	.byte	11                              // DW_FORM_data1
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	9                               // Abbreviation Code
+	.byte	36                              // DW_TAG_base_type
+	.byte	0                               // DW_CHILDREN_no
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	11                              // DW_AT_byte_size
+	.byte	11                              // DW_FORM_data1
+	.byte	62                              // DW_AT_encoding
+	.byte	11                              // DW_FORM_data1
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	10                              // Abbreviation Code
+	.byte	46                              // DW_TAG_subprogram
+	.byte	0                               // DW_CHILDREN_no
+	.byte	17                              // DW_AT_low_pc
+	.byte	27                              // DW_FORM_addrx
+	.byte	18                              // DW_AT_high_pc
+	.byte	6                               // DW_FORM_data4
+	.byte	64                              // DW_AT_frame_base
+	.byte	24                              // DW_FORM_exprloc
+	.byte	110                             // DW_AT_linkage_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	63                              // DW_AT_external
+	.byte	25                              // DW_FORM_flag_present
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	11                              // Abbreviation Code
+	.byte	46                              // DW_TAG_subprogram
+	.byte	1                               // DW_CHILDREN_yes
+	.byte	17                              // DW_AT_low_pc
+	.byte	27                              // DW_FORM_addrx
+	.byte	18                              // DW_AT_high_pc
+	.byte	6                               // DW_FORM_data4
+	.byte	64                              // DW_AT_frame_base
+	.byte	24                              // DW_FORM_exprloc
+	.byte	110                             // DW_AT_linkage_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	63                              // DW_AT_external
+	.byte	25                              // DW_FORM_flag_present
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	12                              // Abbreviation Code
+	.byte	5                               // DW_TAG_formal_parameter
+	.byte	0                               // DW_CHILDREN_no
+	.byte	2                               // DW_AT_location
+	.byte	24                              // DW_FORM_exprloc
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	13                              // Abbreviation Code
+	.byte	46                              // DW_TAG_subprogram
+	.byte	0                               // DW_CHILDREN_no
+	.byte	17                              // DW_AT_low_pc
+	.byte	27                              // DW_FORM_addrx
+	.byte	18                              // DW_AT_high_pc
+	.byte	6                               // DW_FORM_data4
+	.byte	64                              // DW_AT_frame_base
+	.byte	24                              // DW_FORM_exprloc
+	.byte	3                               // DW_AT_name
+	.byte	37                              // DW_FORM_strx1
+	.byte	58                              // DW_AT_decl_file
+	.byte	11                              // DW_FORM_data1
+	.byte	59                              // DW_AT_decl_line
+	.byte	11                              // DW_FORM_data1
+	.byte	73                              // DW_AT_type
+	.byte	19                              // DW_FORM_ref4
+	.byte	63                              // DW_AT_external
+	.byte	25                              // DW_FORM_flag_present
+	.byte	0                               // EOM(1)
+	.byte	0                               // EOM(2)
+	.byte	0                               // EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.word	.Ldebug_info_end0-.Ldebug_info_start0 // Length of Unit
+.Ldebug_info_start0:
+	.hword	5                               // DWARF version number
+	.byte	1                               // DWARF Unit Type
+	.byte	8                               // Address Size (in bytes)
+	.word	.debug_abbrev                   // Offset Into Abbrev. Section
+	.byte	1                               // Abbrev [1] 0xc:0x110 DW_TAG_compile_unit
+	.byte	0                               // DW_AT_producer
+	.hword	4                               // DW_AT_language
+	.byte	1                               // DW_AT_name
+	.word	.Lstr_offsets_base0             // DW_AT_str_offsets_base
+	.word	.Lline_table_start0             // DW_AT_stmt_list
+	.byte	2                               // DW_AT_comp_dir
+	.xword	0                               // DW_AT_low_pc
+	.byte	0                               // DW_AT_ranges
+	.word	.Laddr_table_base0              // DW_AT_addr_base
+	.word	.Lrnglists_table_base0          // DW_AT_rnglists_base
+	.byte	2                               // Abbrev [2] 0x2b:0xb DW_TAG_variable
+	.byte	3                               // DW_AT_name
+	.word	54                              // DW_AT_type
+                                        // DW_AT_external
+	.byte	0                               // DW_AT_decl_file
+	.byte	3                               // DW_AT_decl_line
+	.byte	2                               // DW_AT_location
+	.byte	161
+	.byte	0
+	.byte	3                               // Abbrev [3] 0x36:0x5 DW_TAG_pointer_type
+	.word	59                              // DW_AT_type
+	.byte	4                               // Abbrev [4] 0x3b:0x5 DW_TAG_const_type
+	.word	64                              // DW_AT_type
+	.byte	5                               // Abbrev [5] 0x40:0x4 DW_TAG_base_type
+	.byte	4                               // DW_AT_name
+	.byte	8                               // DW_AT_encoding
+	.byte	1                               // DW_AT_byte_size
+	.byte	2                               // Abbrev [2] 0x44:0xb DW_TAG_variable
+	.byte	5                               // DW_AT_name
+	.word	79                              // DW_AT_type
+                                        // DW_AT_external
+	.byte	0                               // DW_AT_decl_file
+	.byte	4                               // DW_AT_decl_line
+	.byte	2                               // DW_AT_location
+	.byte	161
+	.byte	1
+	.byte	3                               // Abbrev [3] 0x4f:0x5 DW_TAG_pointer_type
+	.word	54                              // DW_AT_type
+	.byte	6                               // Abbrev [6] 0x54:0xc DW_TAG_variable
+	.byte	6                               // DW_AT_name
+	.word	96                              // DW_AT_type
+	.byte	0                               // DW_AT_decl_file
+	.byte	1                               // DW_AT_decl_line
+	.byte	2                               // DW_AT_location
+	.byte	161
+	.byte	2
+	.byte	8                               // DW_AT_linkage_name
+	.byte	7                               // Abbrev [7] 0x60:0xc DW_TAG_array_type
+	.word	59                              // DW_AT_type
+	.byte	8                               // Abbrev [8] 0x65:0x6 DW_TAG_subrange_type
+	.word	108                             // DW_AT_type
+	.byte	12                              // DW_AT_count
+	.byte	0                               // End Of Children Mark
+	.byte	9                               // Abbrev [9] 0x6c:0x4 DW_TAG_base_type
+	.byte	7                               // DW_AT_name
+	.byte	8                               // DW_AT_byte_size
+	.byte	7                               // DW_AT_encoding
+	.byte	10                              // Abbrev [10] 0x70:0xc DW_TAG_subprogram
+	.byte	3                               // DW_AT_low_pc
+	.word	.Lfunc_end0-.Lfunc_begin0       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	111
+	.byte	9                               // DW_AT_linkage_name
+	.byte	10                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	5                               // DW_AT_decl_line
+                                        // DW_AT_external
+	.byte	11                              // Abbrev [11] 0x7c:0x1c DW_TAG_subprogram
+	.byte	4                               // DW_AT_low_pc
+	.word	.Lfunc_end1-.Lfunc_begin1       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	109
+	.byte	11                              // DW_AT_linkage_name
+	.byte	12                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	9                               // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	12                              // Abbrev [12] 0x8c:0xb DW_TAG_formal_parameter
+	.byte	2                               // DW_AT_location
+	.byte	145
+	.byte	124
+	.byte	23                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	9                               // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+	.byte	0                               // End Of Children Mark
+	.byte	11                              // Abbrev [11] 0x98:0x1c DW_TAG_subprogram
+	.byte	5                               // DW_AT_low_pc
+	.word	.Lfunc_end2-.Lfunc_begin2       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	109
+	.byte	14                              // DW_AT_linkage_name
+	.byte	15                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	14                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	12                              // Abbrev [12] 0xa8:0xb DW_TAG_formal_parameter
+	.byte	2                               // DW_AT_location
+	.byte	145
+	.byte	124
+	.byte	23                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	14                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+	.byte	0                               // End Of Children Mark
+	.byte	11                              // Abbrev [11] 0xb4:0x1c DW_TAG_subprogram
+	.byte	6                               // DW_AT_low_pc
+	.word	.Lfunc_end3-.Lfunc_begin3       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	109
+	.byte	16                              // DW_AT_linkage_name
+	.byte	17                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	19                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	12                              // Abbrev [12] 0xc4:0xb DW_TAG_formal_parameter
+	.byte	2                               // DW_AT_location
+	.byte	145
+	.byte	124
+	.byte	23                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	19                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+	.byte	0                               // End Of Children Mark
+	.byte	11                              // Abbrev [11] 0xd0:0x1c DW_TAG_subprogram
+	.byte	7                               // DW_AT_low_pc
+	.word	.Lfunc_end4-.Lfunc_begin4       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	109
+	.byte	18                              // DW_AT_linkage_name
+	.byte	19                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	23                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	12                              // Abbrev [12] 0xe0:0xb DW_TAG_formal_parameter
+	.byte	2                               // DW_AT_location
+	.byte	145
+	.byte	124
+	.byte	23                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	23                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+	.byte	0                               // End Of Children Mark
+	.byte	11                              // Abbrev [11] 0xec:0x1c DW_TAG_subprogram
+	.byte	8                               // DW_AT_low_pc
+	.word	.Lfunc_end5-.Lfunc_begin5       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	109
+	.byte	20                              // DW_AT_linkage_name
+	.byte	21                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	27                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	12                              // Abbrev [12] 0xfc:0xb DW_TAG_formal_parameter
+	.byte	2                               // DW_AT_location
+	.byte	145
+	.byte	124
+	.byte	23                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	27                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+	.byte	0                               // End Of Children Mark
+	.byte	13                              // Abbrev [13] 0x108:0xf DW_TAG_subprogram
+	.byte	9                               // DW_AT_low_pc
+	.word	.Lfunc_end6-.Lfunc_begin6       // DW_AT_high_pc
+	.byte	1                               // DW_AT_frame_base
+	.byte	111
+	.byte	22                              // DW_AT_name
+	.byte	0                               // DW_AT_decl_file
+	.byte	31                              // DW_AT_decl_line
+	.word	279                             // DW_AT_type
+                                        // DW_AT_external
+	.byte	5                               // Abbrev [5] 0x117:0x4 DW_TAG_base_type
+	.byte	13                              // DW_AT_name
+	.byte	5                               // DW_AT_encoding
+	.byte	4                               // DW_AT_byte_size
+	.byte	0                               // End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_rnglists,"", at progbits
+	.word	.Ldebug_list_header_end0-.Ldebug_list_header_start0 // Length
+.Ldebug_list_header_start0:
+	.hword	5                               // Version
+	.byte	8                               // Address size
+	.byte	0                               // Segment selector size
+	.word	1                               // Offset entry count
+.Lrnglists_table_base0:
+	.word	.Ldebug_ranges0-.Lrnglists_table_base0
+.Ldebug_ranges0:
+	.byte	3                               // DW_RLE_startx_length
+	.byte	3                               //   start index
+	.uleb128 .Lfunc_end0-.Lfunc_begin0      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	4                               //   start index
+	.uleb128 .Lfunc_end1-.Lfunc_begin1      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	5                               //   start index
+	.uleb128 .Lfunc_end2-.Lfunc_begin2      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	6                               //   start index
+	.uleb128 .Lfunc_end3-.Lfunc_begin3      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	7                               //   start index
+	.uleb128 .Lfunc_end4-.Lfunc_begin4      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	8                               //   start index
+	.uleb128 .Lfunc_end5-.Lfunc_begin5      //   length
+	.byte	3                               // DW_RLE_startx_length
+	.byte	9                               //   start index
+	.uleb128 .Lfunc_end6-.Lfunc_begin6      //   length
+	.byte	0                               // DW_RLE_end_of_list
+.Ldebug_list_header_end0:
+	.section	.debug_str_offsets,"", at progbits
+	.word	100                             // Length of String Offsets Set
+	.hword	5
+	.hword	0
+.Lstr_offsets_base0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.byte	0                               // string offset=0
+.Linfo_string1:
+	.asciz	"a.cc"                          // string offset=1
+.Linfo_string2:
+	.asciz	"/proc/self/cwd"                // string offset=6
+.Linfo_string3:
+	.asciz	"r1"                            // string offset=21
+.Linfo_string4:
+	.asciz	"char"                          // string offset=24
+.Linfo_string5:
+	.asciz	"r2"                            // string offset=29
+.Linfo_string6:
+	.asciz	"s1"                            // string offset=32
+.Linfo_string7:
+	.asciz	"__ARRAY_SIZE_TYPE__"           // string offset=35
+.Linfo_string8:
+	.asciz	"_ZL2s1"                        // string offset=55
+.Linfo_string9:
+	.asciz	"_Z1Av"                         // string offset=62
+.Linfo_string10:
+	.asciz	"A"                             // string offset=68
+.Linfo_string11:
+	.asciz	"_Z1Bi"                         // string offset=70
+.Linfo_string12:
+	.asciz	"B"                             // string offset=76
+.Linfo_string13:
+	.asciz	"int"                           // string offset=78
+.Linfo_string14:
+	.asciz	"_Z1Ci"                         // string offset=82
+.Linfo_string15:
+	.asciz	"C"                             // string offset=88
+.Linfo_string16:
+	.asciz	"_Z1Di"                         // string offset=90
+.Linfo_string17:
+	.asciz	"D"                             // string offset=96
+.Linfo_string18:
+	.asciz	"_Z1Ei"                         // string offset=98
+.Linfo_string19:
+	.asciz	"E"                             // string offset=104
+.Linfo_string20:
+	.asciz	"_Z1Fi"                         // string offset=106
+.Linfo_string21:
+	.asciz	"F"                             // string offset=112
+.Linfo_string22:
+	.asciz	"main"                          // string offset=114
+.Linfo_string23:
+	.asciz	"a"                             // string offset=119
+	.section	.debug_str_offsets,"", at progbits
+	.word	.Linfo_string0
+	.word	.Linfo_string1
+	.word	.Linfo_string2
+	.word	.Linfo_string3
+	.word	.Linfo_string4
+	.word	.Linfo_string5
+	.word	.Linfo_string6
+	.word	.Linfo_string7
+	.word	.Linfo_string8
+	.word	.Linfo_string9
+	.word	.Linfo_string10
+	.word	.Linfo_string11
+	.word	.Linfo_string12
+	.word	.Linfo_string13
+	.word	.Linfo_string14
+	.word	.Linfo_string15
+	.word	.Linfo_string16
+	.word	.Linfo_string17
+	.word	.Linfo_string18
+	.word	.Linfo_string19
+	.word	.Linfo_string20
+	.word	.Linfo_string21
+	.word	.Linfo_string22
+	.word	.Linfo_string23
+	.section	.debug_addr,"", at progbits
+	.word	.Ldebug_addr_end0-.Ldebug_addr_start0 // Length of contribution
+.Ldebug_addr_start0:
+	.hword	5                               // DWARF version number
+	.byte	8                               // Address size
+	.byte	0                               // Segment selector size
+.Laddr_table_base0:
+	.xword	r1
+	.xword	r2
+	.xword	_ZL2s1
+	.xword	.Lfunc_begin0
+	.xword	.Lfunc_begin1
+	.xword	.Lfunc_begin2
+	.xword	.Lfunc_begin3
+	.xword	.Lfunc_begin4
+	.xword	.Lfunc_begin5
+	.xword	.Lfunc_begin6
+.Ldebug_addr_end0:
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.addrsig_sym _Z1Av
+	.addrsig_sym _Z1Bi
+	.addrsig_sym _Z1Ci
+	.addrsig_sym _ZL2s1
+	.addrsig_sym r1
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:
+
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+
+# STARTUP: Ordered 3 sections using balanced partitioning
+
+# RUN: ld.lld -e main -o - a.o --irpgo-profile-sort=a.profdata --symbol-ordering-file a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: _Z1Av
+# ORDERFILE: _Z1Fi
+# ORDERFILE: _Z1Ei
+# ORDERFILE: _Z1Di
+# ORDERFILE: _Z1Ci
+# ORDERFILE: _Z1Bi
+# ORDERFILE: main
+# ORDERFILE: r1
+# ORDERFILE: r2
+
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 3 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 10 sections using balanced partitioning
+
 #--- a.proftext
 :ir
 :temporal_prof_traces
@@ -333,4 +865,4 @@ _Z1Di
 _Z1Av
 _Z1Fi
 _Z1Ei
-_Z1Di
+_Z1Di
\ No newline at end of file

>From ada5834fc71faa251524ad4b21f8edbf0aadc792 Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Thu, 5 Dec 2024 17:29:26 +0800
Subject: [PATCH 4/7] fix some codereview problems

---
 lld/Common/BPSectionOrdererBase.cpp           | 20 +++++------
 lld/ELF/BPSectionOrderer.cpp                  | 36 +++++++++----------
 lld/ELF/BPSectionOrderer.h                    | 20 ++++++-----
 lld/ELF/Writer.cpp                            |  2 +-
 lld/MachO/BPSectionOrderer.cpp                | 15 ++++----
 lld/MachO/BPSectionOrderer.h                  | 21 ++++-------
 lld/include/lld/Common/BPSectionOrdererBase.h |  7 ++--
 7 files changed, 56 insertions(+), 65 deletions(-)

diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index 2fc39cc777f294..234338226b2921 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -1,5 +1,4 @@
-//===- BPSectionOrdererBase.cpp---------------------------------------*- C++
-//-*-===//
+//===- BPSectionOrdererBase.cpp -------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -53,7 +52,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
       ++hashFrequency[hash];
 
   if (duplicateSectionIdxs) {
-    // Merge section that are nearly identical
+    // Merge sections that are nearly identical
     SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
     DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
     for (auto &[sectionIdx, hashes] : sectionHashes) {
@@ -105,22 +104,21 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
     size_t &highestAvailablePriority, llvm::StringRef profilePath,
     bool forFunctionCompression, bool forDataCompression,
     bool compressionSortStartupFunctions, bool verbose,
-    SmallVector<BPSectionBase *> inputSections) {
-  TimeTraceScope timeScope("Balanced Partitioning");
+    SmallVector<std::unique_ptr<BPSectionBase>> &inputSections) {
+  TimeTraceScope timeScope("Setup Balanced Partitioning");
   SmallVector<const BPSectionBase *> sections;
   DenseMap<const BPSectionBase *, uint64_t> sectionToIdx;
   StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
 
   // Process input sections
-  for (const auto *isec : inputSections) {
+  for (const auto &isec : inputSections) {
     if (!isec->hasValidData())
       continue;
 
     unsigned sectionIdx = sections.size();
-    sectionToIdx.try_emplace(isec, sectionIdx);
-    sections.push_back(isec);
-
-    for (auto *sym : isec->getSymbols())
+    sectionToIdx.try_emplace(isec.get(), sectionIdx);
+    sections.emplace_back(isec.get());
+    for (auto &sym : isec->getSymbols())
       if (auto *d = sym->asDefinedSymbol())
         symbolToSectionIdxs[d->getName()].insert(sectionIdx);
   }
@@ -331,7 +329,7 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
       const uint64_t pageSize = (1 << 14);
       uint64_t currentAddress = 0;
       for (const auto *isec : orderedSections) {
-        for (auto *sym : isec->getSymbols()) {
+        for (auto &sym : isec->getSymbols()) {
           if (auto *d = sym->asDefinedSymbol()) {
             uint64_t startAddress = currentAddress + d->getValue();
             uint64_t endAddress = startAddress + d->getSize();
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 0d9e59e9c1bef2..5d9f28a9af7402 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.cpp------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -30,22 +30,24 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
                                   bool verbose) {
   size_t highestAvailablePriority = std::numeric_limits<int>::max();
   // Collect all InputSectionBase objects from symbols and wrap them as
-  // BPSectionBase instances for balanced partitioning which follow the way
+  // BPSectionELF instances for balanced partitioning which follow the way
   // '--symbol-ordering-file' does.
-  SmallVector<lld::BPSectionBase *> sections;
+  SmallVector<std::unique_ptr<BPSectionBase>> sections;
 
   for (Symbol *sym : ctx.symtab->getSymbols())
-    if (auto *d = dyn_cast<Defined>(sym))
-      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-        if (sym->getSize() > 0)
-          sections.push_back(new BPSectionELF(sec, new BPSymbolELF(sym)));
+    if (sym->getSize() > 0)
+      if (auto *d = dyn_cast<Defined>(sym))
+        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+          sections.emplace_back(std::make_unique<BPSectionELF>(
+              sec, std::make_unique<BPSymbolELF>(sym)));
 
   for (ELFFileBase *file : ctx.objectFiles)
     for (Symbol *sym : file->getLocalSymbols())
-      if (auto *d = dyn_cast<Defined>(sym))
-        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-          if (sym->getSize() > 0)
-            sections.push_back(new BPSectionELF(sec, new BPSymbolELF(sym)));
+      if (sym->getSize() > 0)
+        if (auto *d = dyn_cast<Defined>(sym))
+          if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+            sections.emplace_back(std::make_unique<BPSectionELF>(
+                sec, std::make_unique<BPSymbolELF>(sym)));
 
   auto reorderedSections =
       lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
@@ -54,14 +56,10 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
           sections);
 
   DenseMap<const InputSectionBase *, int> result;
-  for (const auto &[BPSectionBase, priority] : reorderedSections) {
-    if (const BPSectionELF *elfSection =
-            dyn_cast<BPSectionELF>(BPSectionBase)) {
-      result[elfSection->getSymbol()->getInputSection()] =
-          static_cast<int>(priority);
-      delete const_cast<BPSectionELF *>(elfSection)->getSymbol();
-      delete const_cast<BPSectionELF *>(elfSection);
-    }
+  for (const auto &[sec, priority] : reorderedSections) {
+    auto *elfSection = cast<BPSectionELF>(sec);
+    result.try_emplace(elfSection->getSymbol()->getInputSection(),
+                       static_cast<int>(priority));
   }
   return result;
 }
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 12089e86ac1748..a9a977782a86c4 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.h -------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -40,8 +40,9 @@ class BPSymbolELF : public BPSymbol {
   llvm::StringRef getName() const override { return sym->getName(); }
 
   BPSymbol *asDefinedSymbol() override {
-    if (auto *d = llvm::dyn_cast<Defined>(sym))
+    if (auto *d = llvm::dyn_cast<Defined>(sym)) {
       return this;
+    }
     return nullptr;
   }
 
@@ -68,16 +69,16 @@ class BPSymbolELF : public BPSymbol {
 
 class BPSectionELF : public BPSectionBase {
   const InputSectionBase *isec;
-  BPSymbolELF *symbol;
-  llvm::SmallVector<BPSymbol *, 0> symbols;
+  std::unique_ptr<BPSymbolELF> symbol;
 
 public:
-  explicit BPSectionELF(const InputSectionBase *sec, BPSymbolELF *sym)
-      : isec(sec), symbol(sym), symbols({sym}) {}
+  explicit BPSectionELF(const InputSectionBase *sec,
+                        std::unique_ptr<BPSymbolELF> sym)
+      : isec(sec), symbol(std::move(sym)) {}
 
   const InputSectionBase *getSection() const { return isec; }
 
-  BPSymbolELF *getSymbol() const { return symbol; }
+  BPSymbolELF *getSymbol() const { return symbol.get(); }
   llvm::StringRef getName() const override { return isec->name; }
 
   uint64_t getSize() const override { return isec->getSize(); }
@@ -94,7 +95,10 @@ class BPSectionELF : public BPSectionBase {
     return isec->content();
   }
 
-  llvm::ArrayRef<BPSymbol *> getSymbols() const override { return symbols; }
+  llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
+    return llvm::ArrayRef<std::unique_ptr<BPSymbol>>(
+        reinterpret_cast<const std::unique_ptr<BPSymbol> *>(&symbol), 1);
+  }
 
   void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
                       const llvm::DenseMap<const BPSectionBase *, uint64_t>
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 860e528964c465..434f754671cabf 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1089,7 +1089,7 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
         ctx.arg.verboseBpSectionOrderer);
   }
   // Use the rarely used option --call-graph-ordering-file to sort sections.
-  else if (!ctx.arg.callGraphProfile.empty())
+  if (!ctx.arg.callGraphProfile.empty())
     return computeCallGraphProfileOrder(ctx);
 
   if (ctx.arg.symbolOrderingFile.empty())
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 40052ff09dcc64..be7fa04a552153 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.cpp -----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,6 +14,8 @@
 #include "llvm/Support/BalancedPartitioning.h"
 #include "llvm/Support/TimeProfiler.h"
 
+#define DEBUG_TYPE "bp-section-orderer"
+
 using namespace llvm;
 using namespace lld::macho;
 
@@ -22,14 +24,14 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     bool forFunctionCompression, bool forDataCompression,
     bool compressionSortStartupFunctions, bool verbose) {
 
-  SmallVector<BPSectionBase *> sections;
+  SmallVector<std::unique_ptr<BPSectionBase>> sections;
   for (const auto *file : inputFiles) {
     for (auto *sec : file->sections) {
       for (auto &subsec : sec->subsections) {
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty() || !isec->data.data())
           continue;
-        sections.push_back(new BPSectionMacho(isec));
+        sections.emplace_back(std::make_unique<BPSectionMacho>(isec));
       }
     }
   }
@@ -41,10 +43,9 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
           sections);
 
   DenseMap<const InputSection *, size_t> result;
-  for (const auto &[BPSectionBase, priority] : reorderedSections) {
-    if (auto *machoSection = dyn_cast<BPSectionMacho>(BPSectionBase)) {
-      result[machoSection->getSection()] = priority;
-      delete machoSection;
+  for (const auto &[sec, priority] : reorderedSections) {
+    if (auto *machoSection = dyn_cast<BPSectionMacho>(sec)) {
+      result.try_emplace(machoSection->getSection(), priority);
     }
   }
   return result;
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index b57e814120a22b..e689eefca95985 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.h -------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -57,7 +57,7 @@ class BPSymbolMacho : public BPSymbol {
 
 class BPSectionMacho : public BPSectionBase {
   const InputSection *isec;
-  mutable std::vector<std::unique_ptr<BPSymbolMacho>> symbolCache;
+  mutable std::vector<std::unique_ptr<BPSymbol>> symbols;
 
 public:
   explicit BPSectionMacho(const InputSection *sec) : isec(sec) {}
@@ -76,17 +76,11 @@ class BPSectionMacho : public BPSectionBase {
 
   llvm::ArrayRef<uint8_t> getSectionData() const override { return isec->data; }
 
-  llvm::ArrayRef<BPSymbol *> getSymbols() const override {
-    // Lazy initialization of symbol cache
-    if (symbolCache.empty()) {
-      for (const auto *sym : isec->symbols)
-        symbolCache.push_back(std::make_unique<BPSymbolMacho>(sym));
+  llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
+    for (auto *d : isec->symbols) {
+      symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
     }
-    static std::vector<BPSymbol *> result;
-    result.clear();
-    for (const auto &sym : symbolCache)
-      result.push_back(sym.get());
-    return result;
+    return symbols;
   }
 
   void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
@@ -145,9 +139,6 @@ class BPSectionMacho : public BPSectionBase {
     if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
       kind += (" Symbol " + Twine(sym->kind())).str();
       if (auto *d = llvm::dyn_cast<Defined>(sym)) {
-        if (llvm::isa_and_nonnull<CStringInputSection>(isec))
-          return BPSectionBase::getRelocHash(kind, 0, isec->getOffset(d->value),
-                                             reloc.addend);
         return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
                                            d->value, reloc.addend);
       }
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index 793fd9a8deee79..b08d78a151304b 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -1,5 +1,4 @@
-//===- BPSectionOrdererBase.h ---------------------------------------*- C++
-//-*-===//
+//===- BPSectionOrdererBase.h ---------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -41,7 +40,7 @@ class BPSectionBase {
   virtual bool hasValidData() const = 0;
   virtual bool isCodeSection() const = 0;
   virtual llvm::ArrayRef<uint8_t> getSectionData() const = 0;
-  virtual llvm::ArrayRef<BPSymbol *> getSymbols() const = 0;
+  virtual llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
   virtual void
   getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
                  const llvm::DenseMap<const BPSectionBase *, uint64_t>
@@ -70,7 +69,7 @@ class BPSectionOrdererBase {
       size_t &highestAvailablePriority, llvm::StringRef profilePath,
       bool forFunctionCompression, bool forDataCompression,
       bool compressionSortStartupFunctions, bool verbose,
-      llvm::SmallVector<BPSectionBase *> inputSections);
+      llvm::SmallVector<std::unique_ptr<BPSectionBase>> &inputSections);
 };
 
 } // namespace lld

>From cb47def73c30400c2e41b5d619556aa8f94a9b0e Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Thu, 5 Dec 2024 20:05:09 +0800
Subject: [PATCH 5/7] fix: ELF bp-section-orderer.s test

---
 lld/test/ELF/bp-section-orderer.s | 751 +++---------------------------
 1 file changed, 76 insertions(+), 675 deletions(-)

diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 22154cc7c959a4..1e96880c565583 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -3,8 +3,81 @@
 # RUN: rm -rf %t && split-file %s %t && cd %t
 # RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
 # RUN: llvm-profdata merge a.proftext -o a.profdata
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+
+# STARTUP: Ordered 3 sections using balanced partitioning
+
+# RUN: ld.lld -e main -o - a.o --irpgo-profile-sort=a.profdata --symbol-ordering-file a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: _Z1Av
+# ORDERFILE: _Z1Fi
+# ORDERFILE: _Z1Ei
+# ORDERFILE: _Z1Di
+# ORDERFILE: _Z1Ci
+# ORDERFILE: _Z1Bi
+# ORDERFILE: main
+# ORDERFILE: r1
+# ORDERFILE: r2
+
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 3 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 10 sections using balanced partitioning
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+_Z1Av, _Z1Bi, _Z1Ci
+
+_Z1Av
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Bi
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Ci
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Di
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+_Z1Av
+_Z1Fi
+_Z1Ei
+_Z1Di
 
-.ifdef GEN
 #--- a.cc
 const char s1[] = "hello world";
 const char s2[] = "i am a string";
@@ -41,209 +114,129 @@ int main() {
 }
 #--- gen
 echo '#--- a.s'
-clang -target aarch64-linux-gnu -fdebug-compilation-dir='/proc/self/cwd' -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S -g a.cc -o -
-.endif
+clang --target=aarch64-linux-gnu -fdebug-compilation-dir='/proc/self/cwd' -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.cc -o -
+;--- a.ll
 #--- a.s
 	.text
 	.file	"a.cc"
-	.file	0 "/proc/self/cwd" "a.cc" md5 0xd88df55d5eb7769f11cfb15e5857b68c
 	.section	.text._Z1Av,"ax", at progbits
 	.globl	_Z1Av                           // -- Begin function _Z1Av
 	.p2align	2
 	.type	_Z1Av, at function
 _Z1Av:                                  // @_Z1Av
-.Lfunc_begin0:
-	.cfi_sections .debug_frame
-	.cfi_startproc
 // %bb.0:
-	.loc	0 6 5 prologue_end              // a.cc:6:5
 	ret
-.Ltmp0:
 .Lfunc_end0:
 	.size	_Z1Av, .Lfunc_end0-_Z1Av
-	.cfi_endproc
                                         // -- End function
 	.section	.text._Z1Bi,"ax", at progbits
 	.globl	_Z1Bi                           // -- Begin function _Z1Bi
 	.p2align	2
 	.type	_Z1Bi, at function
 _Z1Bi:                                  // @_Z1Bi
-.Lfunc_begin1:
-	.loc	0 9 0                           // a.cc:9:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
-	.cfi_def_cfa w29, 16
-	.cfi_offset w30, -8
-	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
-.Ltmp1:
-	.loc	0 10 5 prologue_end             // a.cc:10:5
 	bl	_Z1Av
-	.loc	0 11 12                         // a.cc:11:12
 	ldur	w8, [x29, #-4]
-	.loc	0 11 14 is_stmt 0               // a.cc:11:14
 	add	w0, w8, #1
-	.loc	0 11 5 epilogue_begin           // a.cc:11:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
 	ret
-.Ltmp2:
 .Lfunc_end1:
 	.size	_Z1Bi, .Lfunc_end1-_Z1Bi
-	.cfi_endproc
                                         // -- End function
 	.section	.text._Z1Ci,"ax", at progbits
 	.globl	_Z1Ci                           // -- Begin function _Z1Ci
 	.p2align	2
 	.type	_Z1Ci, at function
 _Z1Ci:                                  // @_Z1Ci
-.Lfunc_begin2:
-	.loc	0 14 0 is_stmt 1                // a.cc:14:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
-	.cfi_def_cfa w29, 16
-	.cfi_offset w30, -8
-	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
-.Ltmp3:
-	.loc	0 15 5 prologue_end             // a.cc:15:5
 	bl	_Z1Av
-	.loc	0 16 12                         // a.cc:16:12
 	ldur	w8, [x29, #-4]
-	.loc	0 16 14 is_stmt 0               // a.cc:16:14
 	add	w0, w8, #2
-	.loc	0 16 5 epilogue_begin           // a.cc:16:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
 	ret
-.Ltmp4:
 .Lfunc_end2:
 	.size	_Z1Ci, .Lfunc_end2-_Z1Ci
-	.cfi_endproc
                                         // -- End function
 	.section	.text._Z1Di,"ax", at progbits
 	.globl	_Z1Di                           // -- Begin function _Z1Di
 	.p2align	2
 	.type	_Z1Di, at function
 _Z1Di:                                  // @_Z1Di
-.Lfunc_begin3:
-	.loc	0 19 0 is_stmt 1                // a.cc:19:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
-	.cfi_def_cfa w29, 16
-	.cfi_offset w30, -8
-	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
-.Ltmp5:
-	.loc	0 20 14 prologue_end            // a.cc:20:14
 	ldur	w8, [x29, #-4]
-	.loc	0 20 16 is_stmt 0               // a.cc:20:16
 	add	w0, w8, #2
-	.loc	0 20 12                         // a.cc:20:12
 	bl	_Z1Bi
-	.loc	0 20 5 epilogue_begin           // a.cc:20:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
 	ret
-.Ltmp6:
 .Lfunc_end3:
 	.size	_Z1Di, .Lfunc_end3-_Z1Di
-	.cfi_endproc
                                         // -- End function
 	.section	.text._Z1Ei,"ax", at progbits
 	.globl	_Z1Ei                           // -- Begin function _Z1Ei
 	.p2align	2
 	.type	_Z1Ei, at function
 _Z1Ei:                                  // @_Z1Ei
-.Lfunc_begin4:
-	.loc	0 23 0 is_stmt 1                // a.cc:23:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
-	.cfi_def_cfa w29, 16
-	.cfi_offset w30, -8
-	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
-.Ltmp7:
-	.loc	0 24 14 prologue_end            // a.cc:24:14
 	ldur	w8, [x29, #-4]
-	.loc	0 24 16 is_stmt 0               // a.cc:24:16
 	add	w0, w8, #2
-	.loc	0 24 12                         // a.cc:24:12
 	bl	_Z1Ci
-	.loc	0 24 5 epilogue_begin           // a.cc:24:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
 	ret
-.Ltmp8:
 .Lfunc_end4:
 	.size	_Z1Ei, .Lfunc_end4-_Z1Ei
-	.cfi_endproc
                                         // -- End function
 	.section	.text._Z1Fi,"ax", at progbits
 	.globl	_Z1Fi                           // -- Begin function _Z1Fi
 	.p2align	2
 	.type	_Z1Fi, at function
 _Z1Fi:                                  // @_Z1Fi
-.Lfunc_begin5:
-	.loc	0 27 0 is_stmt 1                // a.cc:27:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #32
 	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
 	add	x29, sp, #16
-	.cfi_def_cfa w29, 16
-	.cfi_offset w30, -8
-	.cfi_offset w29, -16
 	stur	w0, [x29, #-4]
-.Ltmp9:
-	.loc	0 28 14 prologue_end            // a.cc:28:14
 	ldur	w8, [x29, #-4]
-	.loc	0 28 16 is_stmt 0               // a.cc:28:16
 	add	w0, w8, #3
-	.loc	0 28 12                         // a.cc:28:12
 	bl	_Z1Ci
-	.loc	0 28 5 epilogue_begin           // a.cc:28:5
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
 	add	sp, sp, #32
 	ret
-.Ltmp10:
 .Lfunc_end5:
 	.size	_Z1Fi, .Lfunc_end5-_Z1Fi
-	.cfi_endproc
                                         // -- End function
 	.section	.text.main,"ax", at progbits
 	.globl	main                            // -- Begin function main
 	.p2align	2
 	.type	main, at function
 main:                                   // @main
-.Lfunc_begin6:
-	.loc	0 31 0 is_stmt 1                // a.cc:31:0
-	.cfi_startproc
 // %bb.0:
 	sub	sp, sp, #16
-	.cfi_def_cfa_offset 16
 	mov	w0, wzr
 	str	wzr, [sp, #12]
-.Ltmp12:
-	.loc	0 32 5 prologue_end epilogue_begin // a.cc:32:5
 	add	sp, sp, #16
 	ret
-.Ltmp13:
 .Lfunc_end6:
 	.size	main, .Lfunc_end6-main
-	.cfi_endproc
                                         // -- End function
 	.type	_ZL2s1, at object                  // @_ZL2s1
 	.section	.rodata._ZL2s1,"a", at progbits
@@ -267,521 +260,6 @@ r2:
 	.xword	r1
 	.size	r2, 8
 
-	.section	.debug_abbrev,"", at progbits
-	.byte	1                               // Abbreviation Code
-	.byte	17                              // DW_TAG_compile_unit
-	.byte	1                               // DW_CHILDREN_yes
-	.byte	37                              // DW_AT_producer
-	.byte	37                              // DW_FORM_strx1
-	.byte	19                              // DW_AT_language
-	.byte	5                               // DW_FORM_data2
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	114                             // DW_AT_str_offsets_base
-	.byte	23                              // DW_FORM_sec_offset
-	.byte	16                              // DW_AT_stmt_list
-	.byte	23                              // DW_FORM_sec_offset
-	.byte	27                              // DW_AT_comp_dir
-	.byte	37                              // DW_FORM_strx1
-	.byte	17                              // DW_AT_low_pc
-	.byte	1                               // DW_FORM_addr
-	.byte	85                              // DW_AT_ranges
-	.byte	35                              // DW_FORM_rnglistx
-	.byte	115                             // DW_AT_addr_base
-	.byte	23                              // DW_FORM_sec_offset
-	.byte	116                             // DW_AT_rnglists_base
-	.byte	23                              // DW_FORM_sec_offset
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	2                               // Abbreviation Code
-	.byte	52                              // DW_TAG_variable
-	.byte	0                               // DW_CHILDREN_no
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	63                              // DW_AT_external
-	.byte	25                              // DW_FORM_flag_present
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	2                               // DW_AT_location
-	.byte	24                              // DW_FORM_exprloc
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	3                               // Abbreviation Code
-	.byte	15                              // DW_TAG_pointer_type
-	.byte	0                               // DW_CHILDREN_no
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	4                               // Abbreviation Code
-	.byte	38                              // DW_TAG_const_type
-	.byte	0                               // DW_CHILDREN_no
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	5                               // Abbreviation Code
-	.byte	36                              // DW_TAG_base_type
-	.byte	0                               // DW_CHILDREN_no
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	62                              // DW_AT_encoding
-	.byte	11                              // DW_FORM_data1
-	.byte	11                              // DW_AT_byte_size
-	.byte	11                              // DW_FORM_data1
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	6                               // Abbreviation Code
-	.byte	52                              // DW_TAG_variable
-	.byte	0                               // DW_CHILDREN_no
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	2                               // DW_AT_location
-	.byte	24                              // DW_FORM_exprloc
-	.byte	110                             // DW_AT_linkage_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	7                               // Abbreviation Code
-	.byte	1                               // DW_TAG_array_type
-	.byte	1                               // DW_CHILDREN_yes
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	8                               // Abbreviation Code
-	.byte	33                              // DW_TAG_subrange_type
-	.byte	0                               // DW_CHILDREN_no
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	55                              // DW_AT_count
-	.byte	11                              // DW_FORM_data1
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	9                               // Abbreviation Code
-	.byte	36                              // DW_TAG_base_type
-	.byte	0                               // DW_CHILDREN_no
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	11                              // DW_AT_byte_size
-	.byte	11                              // DW_FORM_data1
-	.byte	62                              // DW_AT_encoding
-	.byte	11                              // DW_FORM_data1
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	10                              // Abbreviation Code
-	.byte	46                              // DW_TAG_subprogram
-	.byte	0                               // DW_CHILDREN_no
-	.byte	17                              // DW_AT_low_pc
-	.byte	27                              // DW_FORM_addrx
-	.byte	18                              // DW_AT_high_pc
-	.byte	6                               // DW_FORM_data4
-	.byte	64                              // DW_AT_frame_base
-	.byte	24                              // DW_FORM_exprloc
-	.byte	110                             // DW_AT_linkage_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	63                              // DW_AT_external
-	.byte	25                              // DW_FORM_flag_present
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	11                              // Abbreviation Code
-	.byte	46                              // DW_TAG_subprogram
-	.byte	1                               // DW_CHILDREN_yes
-	.byte	17                              // DW_AT_low_pc
-	.byte	27                              // DW_FORM_addrx
-	.byte	18                              // DW_AT_high_pc
-	.byte	6                               // DW_FORM_data4
-	.byte	64                              // DW_AT_frame_base
-	.byte	24                              // DW_FORM_exprloc
-	.byte	110                             // DW_AT_linkage_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	63                              // DW_AT_external
-	.byte	25                              // DW_FORM_flag_present
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	12                              // Abbreviation Code
-	.byte	5                               // DW_TAG_formal_parameter
-	.byte	0                               // DW_CHILDREN_no
-	.byte	2                               // DW_AT_location
-	.byte	24                              // DW_FORM_exprloc
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	13                              // Abbreviation Code
-	.byte	46                              // DW_TAG_subprogram
-	.byte	0                               // DW_CHILDREN_no
-	.byte	17                              // DW_AT_low_pc
-	.byte	27                              // DW_FORM_addrx
-	.byte	18                              // DW_AT_high_pc
-	.byte	6                               // DW_FORM_data4
-	.byte	64                              // DW_AT_frame_base
-	.byte	24                              // DW_FORM_exprloc
-	.byte	3                               // DW_AT_name
-	.byte	37                              // DW_FORM_strx1
-	.byte	58                              // DW_AT_decl_file
-	.byte	11                              // DW_FORM_data1
-	.byte	59                              // DW_AT_decl_line
-	.byte	11                              // DW_FORM_data1
-	.byte	73                              // DW_AT_type
-	.byte	19                              // DW_FORM_ref4
-	.byte	63                              // DW_AT_external
-	.byte	25                              // DW_FORM_flag_present
-	.byte	0                               // EOM(1)
-	.byte	0                               // EOM(2)
-	.byte	0                               // EOM(3)
-	.section	.debug_info,"", at progbits
-.Lcu_begin0:
-	.word	.Ldebug_info_end0-.Ldebug_info_start0 // Length of Unit
-.Ldebug_info_start0:
-	.hword	5                               // DWARF version number
-	.byte	1                               // DWARF Unit Type
-	.byte	8                               // Address Size (in bytes)
-	.word	.debug_abbrev                   // Offset Into Abbrev. Section
-	.byte	1                               // Abbrev [1] 0xc:0x110 DW_TAG_compile_unit
-	.byte	0                               // DW_AT_producer
-	.hword	4                               // DW_AT_language
-	.byte	1                               // DW_AT_name
-	.word	.Lstr_offsets_base0             // DW_AT_str_offsets_base
-	.word	.Lline_table_start0             // DW_AT_stmt_list
-	.byte	2                               // DW_AT_comp_dir
-	.xword	0                               // DW_AT_low_pc
-	.byte	0                               // DW_AT_ranges
-	.word	.Laddr_table_base0              // DW_AT_addr_base
-	.word	.Lrnglists_table_base0          // DW_AT_rnglists_base
-	.byte	2                               // Abbrev [2] 0x2b:0xb DW_TAG_variable
-	.byte	3                               // DW_AT_name
-	.word	54                              // DW_AT_type
-                                        // DW_AT_external
-	.byte	0                               // DW_AT_decl_file
-	.byte	3                               // DW_AT_decl_line
-	.byte	2                               // DW_AT_location
-	.byte	161
-	.byte	0
-	.byte	3                               // Abbrev [3] 0x36:0x5 DW_TAG_pointer_type
-	.word	59                              // DW_AT_type
-	.byte	4                               // Abbrev [4] 0x3b:0x5 DW_TAG_const_type
-	.word	64                              // DW_AT_type
-	.byte	5                               // Abbrev [5] 0x40:0x4 DW_TAG_base_type
-	.byte	4                               // DW_AT_name
-	.byte	8                               // DW_AT_encoding
-	.byte	1                               // DW_AT_byte_size
-	.byte	2                               // Abbrev [2] 0x44:0xb DW_TAG_variable
-	.byte	5                               // DW_AT_name
-	.word	79                              // DW_AT_type
-                                        // DW_AT_external
-	.byte	0                               // DW_AT_decl_file
-	.byte	4                               // DW_AT_decl_line
-	.byte	2                               // DW_AT_location
-	.byte	161
-	.byte	1
-	.byte	3                               // Abbrev [3] 0x4f:0x5 DW_TAG_pointer_type
-	.word	54                              // DW_AT_type
-	.byte	6                               // Abbrev [6] 0x54:0xc DW_TAG_variable
-	.byte	6                               // DW_AT_name
-	.word	96                              // DW_AT_type
-	.byte	0                               // DW_AT_decl_file
-	.byte	1                               // DW_AT_decl_line
-	.byte	2                               // DW_AT_location
-	.byte	161
-	.byte	2
-	.byte	8                               // DW_AT_linkage_name
-	.byte	7                               // Abbrev [7] 0x60:0xc DW_TAG_array_type
-	.word	59                              // DW_AT_type
-	.byte	8                               // Abbrev [8] 0x65:0x6 DW_TAG_subrange_type
-	.word	108                             // DW_AT_type
-	.byte	12                              // DW_AT_count
-	.byte	0                               // End Of Children Mark
-	.byte	9                               // Abbrev [9] 0x6c:0x4 DW_TAG_base_type
-	.byte	7                               // DW_AT_name
-	.byte	8                               // DW_AT_byte_size
-	.byte	7                               // DW_AT_encoding
-	.byte	10                              // Abbrev [10] 0x70:0xc DW_TAG_subprogram
-	.byte	3                               // DW_AT_low_pc
-	.word	.Lfunc_end0-.Lfunc_begin0       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	111
-	.byte	9                               // DW_AT_linkage_name
-	.byte	10                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	5                               // DW_AT_decl_line
-                                        // DW_AT_external
-	.byte	11                              // Abbrev [11] 0x7c:0x1c DW_TAG_subprogram
-	.byte	4                               // DW_AT_low_pc
-	.word	.Lfunc_end1-.Lfunc_begin1       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	109
-	.byte	11                              // DW_AT_linkage_name
-	.byte	12                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	9                               // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	12                              // Abbrev [12] 0x8c:0xb DW_TAG_formal_parameter
-	.byte	2                               // DW_AT_location
-	.byte	145
-	.byte	124
-	.byte	23                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	9                               // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-	.byte	0                               // End Of Children Mark
-	.byte	11                              // Abbrev [11] 0x98:0x1c DW_TAG_subprogram
-	.byte	5                               // DW_AT_low_pc
-	.word	.Lfunc_end2-.Lfunc_begin2       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	109
-	.byte	14                              // DW_AT_linkage_name
-	.byte	15                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	14                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	12                              // Abbrev [12] 0xa8:0xb DW_TAG_formal_parameter
-	.byte	2                               // DW_AT_location
-	.byte	145
-	.byte	124
-	.byte	23                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	14                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-	.byte	0                               // End Of Children Mark
-	.byte	11                              // Abbrev [11] 0xb4:0x1c DW_TAG_subprogram
-	.byte	6                               // DW_AT_low_pc
-	.word	.Lfunc_end3-.Lfunc_begin3       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	109
-	.byte	16                              // DW_AT_linkage_name
-	.byte	17                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	19                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	12                              // Abbrev [12] 0xc4:0xb DW_TAG_formal_parameter
-	.byte	2                               // DW_AT_location
-	.byte	145
-	.byte	124
-	.byte	23                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	19                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-	.byte	0                               // End Of Children Mark
-	.byte	11                              // Abbrev [11] 0xd0:0x1c DW_TAG_subprogram
-	.byte	7                               // DW_AT_low_pc
-	.word	.Lfunc_end4-.Lfunc_begin4       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	109
-	.byte	18                              // DW_AT_linkage_name
-	.byte	19                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	23                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	12                              // Abbrev [12] 0xe0:0xb DW_TAG_formal_parameter
-	.byte	2                               // DW_AT_location
-	.byte	145
-	.byte	124
-	.byte	23                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	23                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-	.byte	0                               // End Of Children Mark
-	.byte	11                              // Abbrev [11] 0xec:0x1c DW_TAG_subprogram
-	.byte	8                               // DW_AT_low_pc
-	.word	.Lfunc_end5-.Lfunc_begin5       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	109
-	.byte	20                              // DW_AT_linkage_name
-	.byte	21                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	27                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	12                              // Abbrev [12] 0xfc:0xb DW_TAG_formal_parameter
-	.byte	2                               // DW_AT_location
-	.byte	145
-	.byte	124
-	.byte	23                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	27                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-	.byte	0                               // End Of Children Mark
-	.byte	13                              // Abbrev [13] 0x108:0xf DW_TAG_subprogram
-	.byte	9                               // DW_AT_low_pc
-	.word	.Lfunc_end6-.Lfunc_begin6       // DW_AT_high_pc
-	.byte	1                               // DW_AT_frame_base
-	.byte	111
-	.byte	22                              // DW_AT_name
-	.byte	0                               // DW_AT_decl_file
-	.byte	31                              // DW_AT_decl_line
-	.word	279                             // DW_AT_type
-                                        // DW_AT_external
-	.byte	5                               // Abbrev [5] 0x117:0x4 DW_TAG_base_type
-	.byte	13                              // DW_AT_name
-	.byte	5                               // DW_AT_encoding
-	.byte	4                               // DW_AT_byte_size
-	.byte	0                               // End Of Children Mark
-.Ldebug_info_end0:
-	.section	.debug_rnglists,"", at progbits
-	.word	.Ldebug_list_header_end0-.Ldebug_list_header_start0 // Length
-.Ldebug_list_header_start0:
-	.hword	5                               // Version
-	.byte	8                               // Address size
-	.byte	0                               // Segment selector size
-	.word	1                               // Offset entry count
-.Lrnglists_table_base0:
-	.word	.Ldebug_ranges0-.Lrnglists_table_base0
-.Ldebug_ranges0:
-	.byte	3                               // DW_RLE_startx_length
-	.byte	3                               //   start index
-	.uleb128 .Lfunc_end0-.Lfunc_begin0      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	4                               //   start index
-	.uleb128 .Lfunc_end1-.Lfunc_begin1      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	5                               //   start index
-	.uleb128 .Lfunc_end2-.Lfunc_begin2      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	6                               //   start index
-	.uleb128 .Lfunc_end3-.Lfunc_begin3      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	7                               //   start index
-	.uleb128 .Lfunc_end4-.Lfunc_begin4      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	8                               //   start index
-	.uleb128 .Lfunc_end5-.Lfunc_begin5      //   length
-	.byte	3                               // DW_RLE_startx_length
-	.byte	9                               //   start index
-	.uleb128 .Lfunc_end6-.Lfunc_begin6      //   length
-	.byte	0                               // DW_RLE_end_of_list
-.Ldebug_list_header_end0:
-	.section	.debug_str_offsets,"", at progbits
-	.word	100                             // Length of String Offsets Set
-	.hword	5
-	.hword	0
-.Lstr_offsets_base0:
-	.section	.debug_str,"MS", at progbits,1
-.Linfo_string0:
-	.byte	0                               // string offset=0
-.Linfo_string1:
-	.asciz	"a.cc"                          // string offset=1
-.Linfo_string2:
-	.asciz	"/proc/self/cwd"                // string offset=6
-.Linfo_string3:
-	.asciz	"r1"                            // string offset=21
-.Linfo_string4:
-	.asciz	"char"                          // string offset=24
-.Linfo_string5:
-	.asciz	"r2"                            // string offset=29
-.Linfo_string6:
-	.asciz	"s1"                            // string offset=32
-.Linfo_string7:
-	.asciz	"__ARRAY_SIZE_TYPE__"           // string offset=35
-.Linfo_string8:
-	.asciz	"_ZL2s1"                        // string offset=55
-.Linfo_string9:
-	.asciz	"_Z1Av"                         // string offset=62
-.Linfo_string10:
-	.asciz	"A"                             // string offset=68
-.Linfo_string11:
-	.asciz	"_Z1Bi"                         // string offset=70
-.Linfo_string12:
-	.asciz	"B"                             // string offset=76
-.Linfo_string13:
-	.asciz	"int"                           // string offset=78
-.Linfo_string14:
-	.asciz	"_Z1Ci"                         // string offset=82
-.Linfo_string15:
-	.asciz	"C"                             // string offset=88
-.Linfo_string16:
-	.asciz	"_Z1Di"                         // string offset=90
-.Linfo_string17:
-	.asciz	"D"                             // string offset=96
-.Linfo_string18:
-	.asciz	"_Z1Ei"                         // string offset=98
-.Linfo_string19:
-	.asciz	"E"                             // string offset=104
-.Linfo_string20:
-	.asciz	"_Z1Fi"                         // string offset=106
-.Linfo_string21:
-	.asciz	"F"                             // string offset=112
-.Linfo_string22:
-	.asciz	"main"                          // string offset=114
-.Linfo_string23:
-	.asciz	"a"                             // string offset=119
-	.section	.debug_str_offsets,"", at progbits
-	.word	.Linfo_string0
-	.word	.Linfo_string1
-	.word	.Linfo_string2
-	.word	.Linfo_string3
-	.word	.Linfo_string4
-	.word	.Linfo_string5
-	.word	.Linfo_string6
-	.word	.Linfo_string7
-	.word	.Linfo_string8
-	.word	.Linfo_string9
-	.word	.Linfo_string10
-	.word	.Linfo_string11
-	.word	.Linfo_string12
-	.word	.Linfo_string13
-	.word	.Linfo_string14
-	.word	.Linfo_string15
-	.word	.Linfo_string16
-	.word	.Linfo_string17
-	.word	.Linfo_string18
-	.word	.Linfo_string19
-	.word	.Linfo_string20
-	.word	.Linfo_string21
-	.word	.Linfo_string22
-	.word	.Linfo_string23
-	.section	.debug_addr,"", at progbits
-	.word	.Ldebug_addr_end0-.Ldebug_addr_start0 // Length of contribution
-.Ldebug_addr_start0:
-	.hword	5                               // DWARF version number
-	.byte	8                               // Address size
-	.byte	0                               // Segment selector size
-.Laddr_table_base0:
-	.xword	r1
-	.xword	r2
-	.xword	_ZL2s1
-	.xword	.Lfunc_begin0
-	.xword	.Lfunc_begin1
-	.xword	.Lfunc_begin2
-	.xword	.Lfunc_begin3
-	.xword	.Lfunc_begin4
-	.xword	.Lfunc_begin5
-	.xword	.Lfunc_begin6
-.Ldebug_addr_end0:
 	.section	".note.GNU-stack","", at progbits
 	.addrsig
 	.addrsig_sym _Z1Av
@@ -789,80 +267,3 @@ r2:
 	.addrsig_sym _Z1Ci
 	.addrsig_sym _ZL2s1
 	.addrsig_sym r1
-	.section	.debug_line,"", at progbits
-.Lline_table_start0:
-
-# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: ld.lld -e main -o a.out a.o --irpgo-profile-sort=a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
-
-# STARTUP: Ordered 3 sections using balanced partitioning
-
-# RUN: ld.lld -e main -o - a.o --irpgo-profile-sort=a.profdata --symbol-ordering-file a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
-
-# ORDERFILE: _Z1Av
-# ORDERFILE: _Z1Fi
-# ORDERFILE: _Z1Ei
-# ORDERFILE: _Z1Di
-# ORDERFILE: _Z1Ci
-# ORDERFILE: _Z1Bi
-# ORDERFILE: main
-# ORDERFILE: r1
-# ORDERFILE: r2
-
-# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
-# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
-# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
-# COMPRESSION-DATA: Ordered 3 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 10 sections using balanced partitioning
-
-#--- a.proftext
-:ir
-:temporal_prof_traces
-# Num Traces
-1
-# Trace Stream Size:
-1
-# Weight
-1
-_Z1Av, _Z1Bi, _Z1Ci
-
-_Z1Av
-# Func Hash:
-1111
-# Num Counters:
-1
-# Counter Values:
-1
-
-_Z1Bi
-# Func Hash:
-2222
-# Num Counters:
-1
-# Counter Values:
-1
-
-_Z1Ci
-# Func Hash:
-3333
-# Num Counters:
-1
-# Counter Values:
-1
-
-_Z1Di
-# Func Hash:
-4444
-# Num Counters:
-1
-# Counter Values:
-1
-
-#--- a.orderfile
-_Z1Av
-_Z1Fi
-_Z1Ei
-_Z1Di
\ No newline at end of file

>From 03ea0240f8f91e4ef7791cf40b1a01d6aca22084 Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Thu, 5 Dec 2024 20:30:45 +0800
Subject: [PATCH 6/7] fix: linkage name specific

---
 lld/Common/BPSectionOrdererBase.cpp           | 6 ++----
 lld/ELF/BPSectionOrderer.h                    | 4 ++++
 lld/MachO/BPSectionOrderer.h                  | 6 ++++++
 lld/include/lld/Common/BPSectionOrdererBase.h | 1 +
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index 234338226b2921..322343572855df 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -129,9 +129,7 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
     name = BPSectionBase::getRootSymbol(name);
     rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
                                          sectionIdxs.end());
-    // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
-    // Mangler::getNameWithPrefix() for details.
-    if (name.consume_front("_") || name.consume_front("l_"))
+    if (sections[*sectionIdxs.begin()]->needResolveLinkageName(name))
       rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
                                            sectionIdxs.end());
   }
@@ -341,7 +339,7 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
             StringRef rootSymbol = d->getName();
             rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
             symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
-            if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
+            if (isec->needResolveLinkageName(rootSymbol))
               symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
           }
         }
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index a9a977782a86c4..c5628eddb7fa43 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -100,6 +100,10 @@ class BPSectionELF : public BPSectionBase {
         reinterpret_cast<const std::unique_ptr<BPSymbol> *>(&symbol), 1);
   }
 
+  bool needResolveLinkageName(llvm::StringRef &name) const override {
+    return false;
+  }
+
   void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
                       const llvm::DenseMap<const BPSectionBase *, uint64_t>
                           &sectionToIdx) const override {
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index e689eefca95985..c97d6800a77b90 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -83,6 +83,12 @@ class BPSectionMacho : public BPSectionBase {
     return symbols;
   }
 
+  // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
+  // Mangler::getNameWithPrefix() for details.
+  bool needResolveLinkageName(llvm::StringRef &name) const override {
+    return (name.consume_front("_") || name.consume_front("l_"));
+  }
+
   void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
                       const llvm::DenseMap<const BPSectionBase *, uint64_t>
                           &sectionToIdx) const override {
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index b08d78a151304b..7feed554823f09 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -45,6 +45,7 @@ class BPSectionBase {
   getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
                  const llvm::DenseMap<const BPSectionBase *, uint64_t>
                      &sectionToIdx) const = 0;
+  virtual bool needResolveLinkageName(llvm::StringRef &name) const = 0;
   static llvm::StringRef getRootSymbol(llvm::StringRef Name) {
     auto [P0, S0] = Name.rsplit(".llvm.");
     auto [P1, S1] = P0.rsplit(".__uniq.");

>From 4baa3d241be1274c8eb643fa4f63bcffd4b3ca6a Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Fri, 6 Dec 2024 17:11:32 +0800
Subject: [PATCH 7/7] fix: move sectionIdx into BPSection* to avoid pass
 sectionIdx everytime

---
 lld/Common/BPSectionOrdererBase.cpp           |  2 +-
 lld/ELF/BPSectionOrderer.h                    | 10 +------
 lld/MachO/BPSectionOrderer.cpp                |  3 +-
 lld/MachO/BPSectionOrderer.h                  | 28 ++++++++-----------
 lld/include/lld/Common/BPSectionOrdererBase.h |  4 +--
 5 files changed, 16 insertions(+), 31 deletions(-)

diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index 322343572855df..a038a11e418158 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -41,7 +41,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
 
   for (unsigned sectionIdx : sectionIdxs) {
     const auto *isec = sections[sectionIdx];
-    isec->getSectionHash(hashes, sectionToIdx);
+    isec->getSectionHash(hashes);
     sectionHashes.emplace_back(sectionIdx, std::move(hashes));
     hashes.clear();
   }
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index c5628eddb7fa43..a2cc4b2d8f966f 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -104,17 +104,9 @@ class BPSectionELF : public BPSectionBase {
     return false;
   }
 
-  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
-                      const llvm::DenseMap<const BPSectionBase *, uint64_t>
-                          &sectionToIdx) const override {
+  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
     constexpr unsigned windowSize = 4;
 
-    // Convert BPSectionBase map to InputSection map
-    llvm::DenseMap<const InputSectionBase *, uint64_t> elfSectionToIdx;
-    for (const auto &[sec, idx] : sectionToIdx)
-      if (auto *elfSec = llvm::dyn_cast<BPSectionELF>(sec))
-        elfSectionToIdx[elfSec->getSection()] = idx;
-
     // Calculate content hashes
     size_t size = isec->content().size();
     for (size_t i = 0; i < size; i++) {
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index be7fa04a552153..721770f4b4a27f 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -31,7 +31,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty() || !isec->data.data())
           continue;
-        sections.emplace_back(std::make_unique<BPSectionMacho>(isec));
+        sections.emplace_back(
+            std::make_unique<BPSectionMacho>(isec, sections.size()));
       }
     }
   }
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index c97d6800a77b90..b5aa1eafbcc136 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -57,10 +57,12 @@ class BPSymbolMacho : public BPSymbol {
 
 class BPSectionMacho : public BPSectionBase {
   const InputSection *isec;
+  uint64_t sectionIdx;
   mutable std::vector<std::unique_ptr<BPSymbol>> symbols;
 
 public:
-  explicit BPSectionMacho(const InputSection *sec) : isec(sec) {}
+  explicit BPSectionMacho(const InputSection *sec, uint64_t sectionIdx)
+      : isec(sec), sectionIdx(sectionIdx) {}
 
   const InputSection *getSection() const { return isec; }
 
@@ -68,6 +70,8 @@ class BPSectionMacho : public BPSectionBase {
 
   uint64_t getSize() const override { return isec->getSize(); }
 
+  uint64_t getSectionIdx() const { return sectionIdx; }
+
   bool isCodeSection() const override { return macho::isCodeSection(isec); }
 
   bool hasValidData() const override {
@@ -89,17 +93,9 @@ class BPSectionMacho : public BPSectionBase {
     return (name.consume_front("_") || name.consume_front("l_"));
   }
 
-  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
-                      const llvm::DenseMap<const BPSectionBase *, uint64_t>
-                          &sectionToIdx) const override {
+  void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
     constexpr unsigned windowSize = 4;
 
-    // Convert BPSectionBase map to InputSection map
-    llvm::DenseMap<const InputSection *, uint64_t> machoSectionToIdx;
-    for (const auto &[sec, idx] : sectionToIdx)
-      if (auto *machoSec = llvm::dyn_cast<BPSectionMacho>(sec))
-        machoSectionToIdx[machoSec->getInputSection()] = idx;
-
     // Calculate content hashes
     size_t dataSize = isec->data.size();
     for (size_t i = 0; i < dataSize; i++) {
@@ -112,7 +108,7 @@ class BPSectionMacho : public BPSectionBase {
       if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
         continue;
 
-      uint64_t relocHash = getRelocHash(r, machoSectionToIdx);
+      uint64_t relocHash = getRelocHash(r, this);
       uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
       for (uint32_t i = start; i < r.offset + r.length; i++) {
         auto window = isec->data.drop_front(i).take_front(windowSize);
@@ -129,14 +125,12 @@ class BPSectionMacho : public BPSectionBase {
   static bool classof(const BPSectionBase *s) { return true; }
 
 private:
-  static uint64_t getRelocHash(
-      const Reloc &reloc,
-      const llvm::DenseMap<const InputSection *, uint64_t> &sectionToIdx) {
+  static uint64_t getRelocHash(const Reloc &reloc,
+                               const BPSectionMacho *section) {
     auto *isec = reloc.getReferentInputSection();
     std::optional<uint64_t> sectionIdx;
-    auto sectionIdxIt = sectionToIdx.find(isec);
-    if (sectionIdxIt != sectionToIdx.end())
-      sectionIdx = sectionIdxIt->getSecond();
+    if (isec && isec == section->getSection())
+      sectionIdx = section->getSectionIdx();
 
     std::string kind;
     if (isec)
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index 7feed554823f09..11dad55973dd09 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -42,9 +42,7 @@ class BPSectionBase {
   virtual llvm::ArrayRef<uint8_t> getSectionData() const = 0;
   virtual llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
   virtual void
-  getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes,
-                 const llvm::DenseMap<const BPSectionBase *, uint64_t>
-                     &sectionToIdx) const = 0;
+  getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const = 0;
   virtual bool needResolveLinkageName(llvm::StringRef &name) const = 0;
   static llvm::StringRef getRootSymbol(llvm::StringRef Name) {
     auto [P0, S0] = Name.rsplit(".llvm.");



More information about the llvm-commits mailing list