[lld] [lld][InstrProf] Profile guided function order (PR #96268)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 13:28:21 PDT 2024


https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/96268

>From 367785047c4832220a448bf2bc9d5421fd690df9 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Thu, 20 Jun 2024 15:18:12 -0700
Subject: [PATCH 1/8] [lld][InstrProf] Profile guided function order

Add the lld flags `--profile-guided-function-order=<profile>`, `--function-order-for-compression`, and `--data-order-for-compression` to order functions to improve startup time, and functions and data to improve compressed size, respectively.

We use Balanced Partitioning to determine the best section order using traces from IRPGO profiles (see https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068 for details) to improve startup time and hashes of section contents to improve compressed size.

In my recent LLVM talk, I showed that we can reduce page faults during startup by 40% on a large iOS app and we can reduce compressed size by 0.8-3%.

More details can be found in https://dl.acm.org/doi/10.1145/3660635
---
 lld/MachO/BPSectionOrderer.cpp             | 420 +++++++++++++++++++++
 lld/MachO/BPSectionOrderer.h               |  35 ++
 lld/MachO/CMakeLists.txt                   |   2 +
 lld/MachO/Config.h                         |   3 +
 lld/MachO/Driver.cpp                       |   5 +
 lld/MachO/Options.td                       |   8 +
 lld/MachO/SectionPriorities.cpp            |   9 +-
 lld/test/MachO/bp-section-orderer-stress.s | 105 ++++++
 lld/test/MachO/bp-section-orderer.s        | 115 ++++++
 9 files changed, 701 insertions(+), 1 deletion(-)
 create mode 100644 lld/MachO/BPSectionOrderer.cpp
 create mode 100644 lld/MachO/BPSectionOrderer.h
 create mode 100644 lld/test/MachO/bp-section-orderer-stress.s
 create mode 100644 lld/test/MachO/bp-section-orderer.s

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
new file mode 100644
index 0000000000000..c2259aefecdf0
--- /dev/null
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -0,0 +1,420 @@
+//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPSectionOrderer.h"
+#include "InputSection.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/xxhash.h"
+
+#define DEBUG_TYPE "bp-section-orderer"
+using namespace llvm;
+using namespace lld::macho;
+
+// TODO: Move to StringRef.h
+static bool isNumber(StringRef S) {
+  return !S.empty() && S.find_first_not_of("0123456789") == StringRef::npos;
+}
+
+/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
+/// "yyyy" are numbers that could change between builds. We need to use the root
+/// symbol name before this suffix so these symbols can be matched with profiles
+/// which may have different suffixes.
+static StringRef getRootSymbol(StringRef Name) {
+  auto [P0, S0] = Name.rsplit(".llvm.");
+  if (isNumber(S0))
+    Name = P0;
+  auto [P1, S1] = Name.rsplit(".__uniq.");
+  if (isNumber(S1))
+    return P1;
+  return Name;
+}
+
+static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx,
+                             uint64_t offset, uint64_t addend) {
+  return xxHash64((kind + ": " + Twine::utohexstr(sectionIdx) + " + " +
+                   Twine::utohexstr(offset) + " + " + Twine::utohexstr(addend))
+                      .str());
+}
+
+static uint64_t
+getRelocHash(const Reloc &reloc,
+             const DenseMap<const InputSection *, uint64_t> &sectionToIdx) {
+  auto *isec = reloc.getReferentInputSection();
+  std::optional<uint64_t> sectionIdx;
+  auto sectionIdxIt = sectionToIdx.find(isec);
+  if (sectionIdxIt != sectionToIdx.end())
+    sectionIdx = sectionIdxIt->getSecond();
+  std::string kind;
+  if (isec)
+    kind = ("Section " + Twine(isec->kind())).str();
+  if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
+    kind += (" Symbol " + Twine(sym->kind())).str();
+    if (auto *d = dyn_cast<Defined>(sym)) {
+      if (isa_and_nonnull<CStringInputSection>(isec))
+        return getRelocHash(kind, 0, isec->getOffset(d->value), reloc.addend);
+      return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend);
+    }
+  }
+  return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
+}
+
+static void constructNodesForCompression(
+    const SmallVector<const InputSection *> &sections,
+    const DenseMap<const InputSection *, uint64_t> &sectionToIdx,
+    const SmallVector<unsigned> &sectionIdxs,
+    std::vector<BPFunctionNode> &nodes,
+    DenseMap<unsigned, SmallVector<unsigned>> &duplicateSectionIdxs,
+    BPFunctionNode::UtilityNodeT &maxUN) {
+
+  SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
+  sectionHashes.reserve(sectionIdxs.size());
+  SmallVector<uint64_t> hashes;
+  for (unsigned sectionIdx : sectionIdxs) {
+    const auto *isec = sections[sectionIdx];
+    constexpr unsigned windowSize = 4;
+
+    for (size_t i = 0; i < isec->data.size(); i++) {
+      auto window = isec->data.drop_front(i).take_front(windowSize);
+      hashes.push_back(xxHash64(window));
+    }
+    for (const auto &r : isec->relocs) {
+      if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
+        continue;
+      uint64_t relocHash = getRelocHash(r, sectionToIdx);
+      uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
+      for (uint32_t i = start; i < r.offset + r.length; i++) {
+        auto window = isec->data.drop_front(i).take_front(windowSize);
+        hashes.push_back(xxHash64(window) + relocHash);
+      }
+    }
+
+    llvm::sort(hashes);
+    hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+
+    sectionHashes.emplace_back(sectionIdx, hashes);
+    hashes.clear();
+  }
+
+  DenseMap<uint64_t, unsigned> hashFrequency;
+  for (auto &[sectionIdx, hashes] : sectionHashes)
+    for (auto hash : hashes)
+      ++hashFrequency[hash];
+
+  // Merge section that are nearly identical
+  SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
+  DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+  for (auto &[sectionIdx, hashes] : sectionHashes) {
+    uint64_t wholeHash = 0;
+    for (auto hash : hashes)
+      if (hashFrequency[hash] > 5)
+        wholeHash ^= hash;
+    auto [it, wasInserted] =
+        wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
+    if (wasInserted) {
+      newSectionHashes.emplace_back(sectionIdx, hashes);
+    } else {
+      duplicateSectionIdxs[it->getSecond()].push_back(sectionIdx);
+    }
+  }
+  sectionHashes = newSectionHashes;
+
+  // Recompute hash frequencies
+  hashFrequency.clear();
+  for (auto &[sectionIdx, hashes] : sectionHashes)
+    for (auto hash : hashes)
+      ++hashFrequency[hash];
+
+  // Filter rare and common hashes and assign each a unique utility node that
+  // doesn't conflict with the trace utility nodes
+  DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
+  for (auto &[hash, frequency] : hashFrequency) {
+    if (frequency <= 1 || frequency * 2 > wholeHashToSectionIdx.size())
+      continue;
+    hashToUN[hash] = ++maxUN;
+  }
+
+  std::vector<BPFunctionNode::UtilityNodeT> uns;
+  for (auto &[sectionIdx, hashes] : sectionHashes) {
+    for (auto &hash : hashes) {
+      auto it = hashToUN.find(hash);
+      if (it != hashToUN.end())
+        uns.push_back(it->second);
+    }
+    nodes.emplace_back(sectionIdx, uns);
+    uns.clear();
+  }
+}
+
+DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
+    size_t &highestAvailablePriority, StringRef profilePath,
+    bool forFunctionCompression, bool forDataCompression) {
+
+  SmallVector<const InputSection *> sections;
+  DenseMap<const InputSection *, uint64_t> sectionToIdx;
+  StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
+  for (const auto *file : inputFiles) {
+    for (auto *sec : file->sections) {
+      for (auto &subsec : sec->subsections) {
+        auto *isec = subsec.isec;
+        if (!isec || isec->data.empty() || !isec->data.data())
+          continue;
+        unsigned sectionIdx = sections.size();
+        sectionToIdx.try_emplace(isec, sectionIdx);
+        sections.push_back(isec);
+        for (Symbol *sym : isec->symbols)
+          if (auto *d = dyn_cast_or_null<Defined>(sym))
+            symbolToSectionIdxs[d->getName()].insert(sectionIdx);
+      }
+    }
+  }
+
+  StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
+  for (auto &[name, sectionIdxs] : symbolToSectionIdxs) {
+    name = getRootSymbol(name);
+    rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+                                         sectionIdxs.end());
+    // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
+    // Mangler::getNameWithPrefix() for details.
+    if (name.consume_front("_") || name.consume_front("l_"))
+      rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+                                           sectionIdxs.end());
+  }
+
+  std::vector<BPFunctionNode> nodesForStartup;
+  BPFunctionNode::UtilityNodeT maxUN = 0;
+  DenseMap<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>
+      startupSectionIdxUNs;
+  std::unique_ptr<InstrProfReader> reader;
+  if (!profilePath.empty()) {
+    auto fs = vfs::getRealFileSystem();
+    auto readerOrErr = InstrProfReader::create(profilePath, *fs);
+    lld::checkError(readerOrErr.takeError());
+
+    reader = std::move(readerOrErr.get());
+    for (auto &entry : *reader) {
+      // Read all entries
+      (void)entry;
+    }
+    auto &traces = reader->getTemporalProfTraces();
+
+    // Used to define the initial order for startup functions.
+    DenseMap<unsigned, size_t> sectionIdxToTimestamp;
+    DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
+    for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
+      uint64_t currentSize = 0, cutoffSize = 1;
+      size_t cutoffTimestamp = 1;
+      auto &trace = traces[traceIdx].FunctionNameRefs;
+      for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
+        auto [Filename, ParsedFuncName] = getParsedIRPGOName(
+            reader->getSymtab().getFuncOrVarName(trace[timestamp]));
+        ParsedFuncName = getRootSymbol(ParsedFuncName);
+
+        auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
+        if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
+          continue;
+        auto &sectionIdxs = sectionIdxsIt->getValue();
+        // If the same symbol is found in multiple sections, they might be
+        // identical, so we arbitrarily use the size from the first section.
+        currentSize += sections[*sectionIdxs.begin()]->getSize();
+
+        // Since BalancedPartitioning is sensitive to the initial order, we need
+        // to explicitly define it to be ordered by earliest timestamp.
+        for (unsigned sectionIdx : sectionIdxs) {
+          auto [it, wasInserted] =
+              sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
+          if (!wasInserted)
+            it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
+        }
+
+        if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
+          ++maxUN;
+          cutoffSize = 2 * currentSize;
+          cutoffTimestamp = 2 * cutoffTimestamp;
+        }
+        for (unsigned sectionIdx : sectionIdxs)
+          sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
+      }
+      for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
+        for (auto un = firstUN; un <= maxUN; ++un)
+          startupSectionIdxUNs[sectionIdx].push_back(un);
+      ++maxUN;
+      sectionIdxToFirstUN.clear();
+    }
+
+    // These uns should already be sorted without duplicates.
+    for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+      nodesForStartup.emplace_back(sectionIdx, uns);
+
+    llvm::sort(nodesForStartup, [&sectionIdxToTimestamp](auto &L, auto &R) {
+      return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
+             std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
+    });
+  }
+
+  SmallVector<unsigned> sectionIdxsForFunctionCompression,
+      sectionIdxsForDataCompression;
+  for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
+    if (startupSectionIdxUNs.count(sectionIdx))
+      continue;
+    const auto *isec = sections[sectionIdx];
+    if (isCodeSection(isec)) {
+      sectionIdxsForFunctionCompression.push_back(sectionIdx);
+    } else {
+      sectionIdxsForDataCompression.push_back(sectionIdx);
+    }
+  }
+
+  std::vector<BPFunctionNode> nodesForFunctionCompression,
+      nodesForDataCompression;
+  // Map a section index (to be ordered for compression) to a list of duplicate
+  // section indices (not ordered for compression).
+  DenseMap<unsigned, SmallVector<unsigned>> duplicateFunctionSectionIdxs,
+      duplicateDataSectionIdxs;
+  if (forFunctionCompression) {
+    TimeTraceScope timeScope("Build nodes for function compression");
+    constructNodesForCompression(
+        sections, sectionToIdx, sectionIdxsForFunctionCompression,
+        nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
+  }
+  if (forDataCompression) {
+    TimeTraceScope timeScope("Build nodes for data compression");
+    constructNodesForCompression(
+        sections, sectionToIdx, sectionIdxsForDataCompression,
+        nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
+  }
+
+  // Sort nodes by their Id (which is the section index) because the input
+  // linker order tends to be not bad
+  llvm::sort(nodesForFunctionCompression,
+             [](auto &L, auto &R) { return L.Id < R.Id; });
+  llvm::sort(nodesForDataCompression,
+             [](auto &L, auto &R) { return L.Id < R.Id; });
+
+  {
+    TimeTraceScope timeScope("Balanced Partitioning");
+    BalancedPartitioningConfig config;
+    BalancedPartitioning bp(config);
+    bp.run(nodesForStartup);
+    bp.run(nodesForFunctionCompression);
+    bp.run(nodesForDataCompression);
+  }
+
+  unsigned numStartupSections = 0;
+  unsigned numCodeCompressionSections = 0;
+  unsigned numDuplicateCodeSections = 0;
+  unsigned numDataCompressionSections = 0;
+  unsigned numDuplicateDataSections = 0;
+  SetVector<const InputSection *> orderedSections;
+  // Order startup functions,
+  for (auto &node : nodesForStartup) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numStartupSections;
+  }
+  // then functions for compression,
+  for (auto &node : nodesForFunctionCompression) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numCodeCompressionSections;
+
+    auto It = duplicateFunctionSectionIdxs.find(node.Id);
+    if (It == duplicateFunctionSectionIdxs.end())
+      continue;
+    for (auto dupSecIdx : It->getSecond()) {
+      const auto *dupIsec = sections[dupSecIdx];
+      if (orderedSections.insert(dupIsec))
+        ++numDuplicateCodeSections;
+    }
+  }
+  // then data for compression.
+  for (auto &node : nodesForDataCompression) {
+    const auto *isec = sections[node.Id];
+    if (orderedSections.insert(isec))
+      ++numDataCompressionSections;
+    auto It = duplicateDataSectionIdxs.find(node.Id);
+    if (It == duplicateDataSectionIdxs.end())
+      continue;
+    for (auto dupSecIdx : It->getSecond()) {
+      const auto *dupIsec = sections[dupSecIdx];
+      if (orderedSections.insert(dupIsec))
+        ++numDuplicateDataSections;
+    }
+  }
+
+#ifndef NDEBUG
+  unsigned numTotalOrderedSections =
+      numStartupSections + numCodeCompressionSections +
+      numDuplicateCodeSections + numDataCompressionSections +
+      numDuplicateDataSections;
+  dbgs() << "Ordered " << numTotalOrderedSections
+         << " sections using balanced partitioning:\n  Functions for startup: "
+         << numStartupSections
+         << "\n  Functions for compression: " << numCodeCompressionSections
+         << "\n  Duplicate functions: " << numDuplicateCodeSections
+         << "\n  Data for compression: " << numDataCompressionSections
+         << "\n  Duplicate data: " << numDuplicateDataSections << "\n";
+
+  if (!profilePath.empty()) {
+    // Evaluate this function order for startup
+    StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
+    const uint64_t pageSize = (1 << 14);
+    uint64_t currentAddress = 0;
+    for (const auto *isec : orderedSections) {
+      for (Symbol *sym : isec->symbols) {
+        if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+          uint64_t startAddress = currentAddress + d->value;
+          uint64_t endAddress = startAddress + d->size;
+          uint64_t firstPage = startAddress / pageSize;
+          // I think the kernel might pull in a few pages when one it touched,
+          // so it might be more accurate to force lastPage to be aligned by 4?
+          uint64_t lastPage = endAddress / pageSize;
+          StringRef rootSymbol = d->getName();
+          rootSymbol = getRootSymbol(rootSymbol);
+          symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+          if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
+            symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+        }
+      }
+
+      currentAddress += isec->getSize();
+    }
+
+    // The area under the curve F where F(t) is the total number of page faults
+    // at step t.
+    unsigned area = 0;
+    for (auto &trace : reader->getTemporalProfTraces()) {
+      SmallSet<uint64_t, 0> touchedPages;
+      for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
+        auto traceId = trace.FunctionNameRefs[step];
+        auto [Filename, ParsedFuncName] =
+            getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
+        ParsedFuncName = getRootSymbol(ParsedFuncName);
+        auto it = symbolToPageNumbers.find(ParsedFuncName);
+        if (it != symbolToPageNumbers.end()) {
+          auto &[firstPage, lastPage] = it->getValue();
+          for (uint64_t i = firstPage; i <= lastPage; i++)
+            touchedPages.insert(i);
+        }
+        area += touchedPages.size();
+      }
+    }
+    dbgs() << "Total area under the page fault curve: " << (float)area << "\n";
+  }
+#endif
+
+  DenseMap<const InputSection *, size_t> sectionPriorities;
+  for (const auto *isec : orderedSections)
+    sectionPriorities[isec] = --highestAvailablePriority;
+  return sectionPriorities;
+}
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
new file mode 100644
index 0000000000000..9d8302b441a7c
--- /dev/null
+++ b/lld/MachO/BPSectionOrderer.h
@@ -0,0 +1,35 @@
+//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file uses Balanced Partitioning to order sections to improve startup
+/// time and compressed size.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_BPSECTION_ORDERER_H
+#define LLD_MACHO_BPSECTION_ORDERER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace lld::macho {
+
+class InputSection;
+
+/// Run Balanced Partitioning to find the optimial function and data order to
+/// improve startup time and compressed size.
+///
+/// It is important that .subsections_via_symbols is used to ensure functions
+/// and data are in their own sections and thus can be reordered.
+llvm::DenseMap<const lld::macho::InputSection *, size_t>
+runBalancedPartitioning(size_t &highestAvailablePriority,
+                        llvm::StringRef profilePath,
+                        bool forFunctionCompression, bool forDataCompression);
+
+} // namespace lld::macho
+
+#endif
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 0b92488b00bea..8b7183e4ec496 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -25,6 +25,7 @@ add_lld_library(lldMachO
   OutputSection.cpp
   OutputSegment.cpp
   Relocations.cpp
+  BPSectionOrderer.cpp
   SectionPriorities.cpp
   SymbolTable.cpp
   Symbols.cpp
@@ -47,6 +48,7 @@ add_lld_library(lldMachO
   Object
   Option
   Passes
+  ProfileData
   Support
   TargetParser
   TextAPI
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index e79812b16ec12..ac860438e65f1 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -213,6 +213,9 @@ struct Configuration {
   llvm::StringRef csProfilePath;
   bool pgoWarnMismatch;
   bool warnThinArchiveMissingMembers;
+  llvm::StringRef profileGuidedFunctionOrderPath;
+  bool functionOrderForCompression;
+  bool dataOrderForCompression;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index dc9d635b48ec4..efa2e368e6f62 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1735,6 +1735,11 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
       args.hasFlag(OPT_warn_thin_archive_missing_members,
                    OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
+  config->profileGuidedFunctionOrderPath =
+      args.getLastArgValue(OPT_profile_guided_function_order);
+  config->functionOrderForCompression =
+      args.hasArg(OPT_function_order_for_compression);
+  config->dataOrderForCompression = args.hasArg(OPT_data_order_for_compression);
 
   for (const Arg *arg : args.filtered(OPT_alias)) {
     config->aliasedSymbols.push_back(
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index bbd8bf70c3a0c..71f403855deb9 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -156,6 +156,14 @@ defm pgo_warn_mismatch: BB<"pgo-warn-mismatch",
 defm warn_thin_archive_missing_members : BB<"warn-thin-archive-missing-members",
   "Warn on missing object files referenced by thin archives (default)",
   "Do not warn on missing object files referenced by thin archives">, Group<grp_lld>;
+def profile_guided_function_order: Joined<["--"], "profile-guided-function-order=">,
+    MetaVarName<"<profile>">, 
+    HelpText<"Read traces from <profile> to order functions to improve startup time">,
+    Group<grp_lld>;
+def function_order_for_compression: Flag<["--"], "function-order-for-compression">,
+    HelpText<"Order functions to improve compressed size">, Group<grp_lld>;
+def data_order_for_compression: Flag<["--"], "data-order-for-compression">,
+    HelpText<"Order data to improve compressed size">, Group<grp_lld>;
 
 // This is a complete Options.td compiled from Apple's ld(1) manpage
 // dated 2018-03-07 and cross checked with ld64 source code in repo
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 907aee29d2386..f4a7e5fdb9cb1 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SectionPriorities.h"
+#include "BPSectionOrderer.h"
 #include "Config.h"
 #include "InputFiles.h"
 #include "Symbols.h"
@@ -352,7 +353,13 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
 DenseMap<const InputSection *, size_t>
 macho::PriorityBuilder::buildInputSectionPriorities() {
   DenseMap<const InputSection *, size_t> sectionPriorities;
-  if (config->callGraphProfileSort) {
+  if (!config->profileGuidedFunctionOrderPath.empty() ||
+      config->functionOrderForCompression || config->dataOrderForCompression) {
+    TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
+    sectionPriorities = runBalancedPartitioning(
+        highestAvailablePriority, config->profileGuidedFunctionOrderPath,
+        config->functionOrderForCompression, config->dataOrderForCompression);
+  } else if (config->callGraphProfileSort) {
     // Sort sections by the profile data provided by __LLVM,__cg_profile
     // sections.
     //
diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s
new file mode 100644
index 0000000000000..a536c79d45021
--- /dev/null
+++ b/lld/test/MachO/bp-section-orderer-stress.s
@@ -0,0 +1,105 @@
+# REQUIRES: aarch64
+
+# Generate a large test case and check that the output is deterministic.
+
+# RUN: %python %s %t.s %t.proftext
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t.s -o %t.o
+# RUN: llvm-profdata merge %t.proftext -o %t.profdata
+
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --profile-guided-function-order=%t.profdata --function-order-for-compression --data-order-for-compression | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --profile-guided-function-order=%t.profdata --function-order-for-compression --data-order-for-compression | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: diff %t.order1.txt %t.order2.txt
+
+import random
+import sys
+
+assembly_filepath = sys.argv[1]
+proftext_filepath = sys.argv[2]
+
+random.seed(1234)
+num_functions = 1000
+num_data = 100
+num_traces = 10
+
+function_names = [f"f{n}" for n in range(num_functions)]
+data_names = [f"d{n}" for n in range(num_data)]
+profiled_functions = function_names[: int(num_functions / 2)]
+
+function_contents = [
+    f"""
+{name}:
+  add w0, w0, #{i % 4096}
+  add w1, w1, #{i % 10}
+  add w2, w0, #{i % 20}
+  adrp x3, {name}@PAGE
+  ret
+"""
+    for i, name in enumerate(function_names)
+]
+
+data_contents = [
+      f"""
+{name}:
+  .ascii "s{i % 2}-{i % 3}-{i % 5}"
+  .xword {name}
+"""
+    for i, name in enumerate(data_names)
+]
+
+trace_contents = [
+    f"""
+# Weight
+1
+{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
+"""
+    for i in range(num_traces)
+]
+
+profile_contents = [
+    f"""
+{name}
+# Func Hash:
+{i}
+# Num Counters:
+1
+# Counter Values:
+1
+"""
+    for i, name in enumerate(profiled_functions)
+]
+
+with open(assembly_filepath, "w") as f:
+    f.write(
+        f"""
+.text
+.globl _main
+
+_main:
+  ret
+
+{"".join(function_contents)}
+
+.data
+{"".join(data_contents)}
+
+.subsections_via_symbols
+"""
+    )
+
+with open(proftext_filepath, "w") as f:
+    f.write(
+        f"""
+:ir
+:temporal_prof_traces
+
+# Num Traces
+{num_traces}
+# Trace Stream Size:
+{num_traces}
+
+{"".join(trace_contents)}
+
+{"".join(profile_contents)}
+"""
+    )
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
new file mode 100644
index 0000000000000..e7614406edf22
--- /dev/null
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -0,0 +1,115 @@
+# REQUIRES: aarch64, asserts
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --profile-guided-function-order=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --function-order-for-compression --data-order-for-compression 2>&1 | FileCheck %s --check-prefix=COMPRESSION
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --function-order-for-compression --data-order-for-compression 2>&1 | FileCheck %s --check-prefix=COMPRESSION
+
+
+# STARTUP: Ordered 3 sections using balanced partitioning
+
+# ORDERFILE: A
+# ORDERFILE: F
+# ORDERFILE: E
+# ORDERFILE: D
+# ORDERFILE-DAG: _B
+# ORDERFILE-DAG: l_C
+
+# COMPRESSION: Ordered 11 sections using balanced partitioning
+
+#--- a.s
+.text
+.globl _main, A, _B, l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+
+_main:
+  ret
+A:
+  ret
+_B:
+  add w0, w0, #1
+  bl  A
+  ret
+l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
+  add w0, w0, #2
+  bl  A
+  ret
+D:
+  add w0, w0, #2
+  bl _B
+  ret
+E:
+  add w0, w0, #2
+  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+F:
+  add w0, w0, #3
+  bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+
+.data
+s1:
+  .ascii "hello world"
+s2:
+  .ascii "i am a string"
+r1:
+  .quad s1
+r2:
+  .quad r1
+
+.subsections_via_symbols
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+
+A
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+B
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+D
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+A
+F
+E
+D

>From 4b4bccdcbb58730ee50bd6a3a15140fb1c84a253 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Fri, 21 Jun 2024 09:29:11 -0700
Subject: [PATCH 2/8] Try to fix windows build

---
 lld/MachO/BPSectionOrderer.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index c2259aefecdf0..ff95ec7c8878e 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -180,7 +180,9 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
   }
 
   StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
-  for (auto &[name, sectionIdxs] : symbolToSectionIdxs) {
+  for (auto &entry : symbolToSectionIdxs) {
+    StringRef name = entry.getKey();
+    auto &sectionIdxs = entry.getValue();
     name = getRootSymbol(name);
     rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
                                          sectionIdxs.end());

>From 1d9b2b4e90e99463517d53be3b822dfb4e44f6ee Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Tue, 25 Jun 2024 15:41:38 -0700
Subject: [PATCH 3/8] Fix typo

Co-authored-by: Vincent Lee <thevinster at users.noreply.github.com>
---
 lld/MachO/BPSectionOrderer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 9d8302b441a7c..c9643e9f5ef53 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -20,7 +20,7 @@ namespace lld::macho {
 
 class InputSection;
 
-/// Run Balanced Partitioning to find the optimial function and data order to
+/// Run Balanced Partitioning to find the optimal function and data order to
 /// improve startup time and compressed size.
 ///
 /// It is important that .subsections_via_symbols is used to ensure functions

>From 28501e508912413d82b2e2fc4e2378dd9cf7d5e9 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Wed, 26 Jun 2024 10:29:29 -0700
Subject: [PATCH 4/8] Check forFunctionCompression earlier

---
 lld/MachO/BPSectionOrderer.cpp | 25 +++++++++++--------------
 lld/MachO/Config.h             |  4 ++--
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index ff95ec7c8878e..688d78bf68889 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -76,6 +76,7 @@ static void constructNodesForCompression(
     std::vector<BPFunctionNode> &nodes,
     DenseMap<unsigned, SmallVector<unsigned>> &duplicateSectionIdxs,
     BPFunctionNode::UtilityNodeT &maxUN) {
+  TimeTraceScope timeScope("Build nodes for compression");
 
   SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
   sectionHashes.reserve(sectionIdxs.size());
@@ -271,9 +272,11 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
       continue;
     const auto *isec = sections[sectionIdx];
     if (isCodeSection(isec)) {
-      sectionIdxsForFunctionCompression.push_back(sectionIdx);
+      if (forFunctionCompression)
+        sectionIdxsForFunctionCompression.push_back(sectionIdx);
     } else {
-      sectionIdxsForDataCompression.push_back(sectionIdx);
+      if (forDataCompression)
+        sectionIdxsForDataCompression.push_back(sectionIdx);
     }
   }
 
@@ -283,18 +286,12 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
   // section indices (not ordered for compression).
   DenseMap<unsigned, SmallVector<unsigned>> duplicateFunctionSectionIdxs,
       duplicateDataSectionIdxs;
-  if (forFunctionCompression) {
-    TimeTraceScope timeScope("Build nodes for function compression");
-    constructNodesForCompression(
-        sections, sectionToIdx, sectionIdxsForFunctionCompression,
-        nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
-  }
-  if (forDataCompression) {
-    TimeTraceScope timeScope("Build nodes for data compression");
-    constructNodesForCompression(
-        sections, sectionToIdx, sectionIdxsForDataCompression,
-        nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
-  }
+  constructNodesForCompression(
+      sections, sectionToIdx, sectionIdxsForFunctionCompression,
+      nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
+  constructNodesForCompression(
+      sections, sectionToIdx, sectionIdxsForDataCompression,
+      nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
 
   // Sort nodes by their Id (which is the section index) because the input
   // linker order tends to be not bad
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index ac860438e65f1..3e1de28900007 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -214,8 +214,8 @@ struct Configuration {
   bool pgoWarnMismatch;
   bool warnThinArchiveMissingMembers;
   llvm::StringRef profileGuidedFunctionOrderPath;
-  bool functionOrderForCompression;
-  bool dataOrderForCompression;
+  bool functionOrderForCompression = false;
+  bool dataOrderForCompression = false;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;

>From e8a9a46ecff509806713d07b08e8a36240fca977 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Fri, 5 Jul 2024 09:36:02 -0700
Subject: [PATCH 5/8] Add flag --verbose-bp-section-orderer

---
 lld/MachO/BPSectionOrderer.cpp      | 109 ++++++++++++++--------------
 lld/MachO/BPSectionOrderer.h        |   4 +-
 lld/MachO/Config.h                  |   1 +
 lld/MachO/Driver.cpp                |   1 +
 lld/MachO/Options.td                |   3 +
 lld/MachO/SectionPriorities.cpp     |   3 +-
 lld/test/MachO/bp-section-orderer.s |  10 +--
 7 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 688d78bf68889..022cae5b38f30 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -159,7 +159,7 @@ static void constructNodesForCompression(
 
 DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     size_t &highestAvailablePriority, StringRef profilePath,
-    bool forFunctionCompression, bool forDataCompression) {
+    bool forFunctionCompression, bool forDataCompression, bool verbose) {
 
   SmallVector<const InputSection *> sections;
   DenseMap<const InputSection *, uint64_t> sectionToIdx;
@@ -351,66 +351,69 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
     }
   }
 
-#ifndef NDEBUG
-  unsigned numTotalOrderedSections =
-      numStartupSections + numCodeCompressionSections +
-      numDuplicateCodeSections + numDataCompressionSections +
-      numDuplicateDataSections;
-  dbgs() << "Ordered " << numTotalOrderedSections
-         << " sections using balanced partitioning:\n  Functions for startup: "
-         << numStartupSections
-         << "\n  Functions for compression: " << numCodeCompressionSections
-         << "\n  Duplicate functions: " << numDuplicateCodeSections
-         << "\n  Data for compression: " << numDataCompressionSections
-         << "\n  Duplicate data: " << numDuplicateDataSections << "\n";
-
-  if (!profilePath.empty()) {
-    // Evaluate this function order for startup
-    StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
-    const uint64_t pageSize = (1 << 14);
-    uint64_t currentAddress = 0;
-    for (const auto *isec : orderedSections) {
-      for (Symbol *sym : isec->symbols) {
-        if (auto *d = dyn_cast_or_null<Defined>(sym)) {
-          uint64_t startAddress = currentAddress + d->value;
-          uint64_t endAddress = startAddress + d->size;
-          uint64_t firstPage = startAddress / pageSize;
-          // I think the kernel might pull in a few pages when one it touched,
-          // so it might be more accurate to force lastPage to be aligned by 4?
-          uint64_t lastPage = endAddress / pageSize;
-          StringRef rootSymbol = d->getName();
-          rootSymbol = getRootSymbol(rootSymbol);
-          symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
-          if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
+  if (verbose) {
+    unsigned numTotalOrderedSections =
+        numStartupSections + numCodeCompressionSections +
+        numDuplicateCodeSections + numDataCompressionSections +
+        numDuplicateDataSections;
+    dbgs()
+        << "Ordered " << numTotalOrderedSections
+        << " sections using balanced partitioning:\n  Functions for startup: "
+        << numStartupSections
+        << "\n  Functions for compression: " << numCodeCompressionSections
+        << "\n  Duplicate functions: " << numDuplicateCodeSections
+        << "\n  Data for compression: " << numDataCompressionSections
+        << "\n  Duplicate data: " << numDuplicateDataSections << "\n";
+
+    if (!profilePath.empty()) {
+      // Evaluate this function order for startup
+      StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
+      const uint64_t pageSize = (1 << 14);
+      uint64_t currentAddress = 0;
+      for (const auto *isec : orderedSections) {
+        for (Symbol *sym : isec->symbols) {
+          if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+            uint64_t startAddress = currentAddress + d->value;
+            uint64_t endAddress = startAddress + d->size;
+            uint64_t firstPage = startAddress / pageSize;
+            // I think the kernel might pull in a few pages when one it touched,
+            // so it might be more accurate to force lastPage to be aligned by
+            // 4?
+            uint64_t lastPage = endAddress / pageSize;
+            StringRef rootSymbol = d->getName();
+            rootSymbol = getRootSymbol(rootSymbol);
             symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+            if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
+              symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+          }
         }
-      }
 
-      currentAddress += isec->getSize();
-    }
+        currentAddress += isec->getSize();
+      }
 
-    // The area under the curve F where F(t) is the total number of page faults
-    // at step t.
-    unsigned area = 0;
-    for (auto &trace : reader->getTemporalProfTraces()) {
-      SmallSet<uint64_t, 0> touchedPages;
-      for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
-        auto traceId = trace.FunctionNameRefs[step];
-        auto [Filename, ParsedFuncName] =
-            getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
-        ParsedFuncName = getRootSymbol(ParsedFuncName);
-        auto it = symbolToPageNumbers.find(ParsedFuncName);
-        if (it != symbolToPageNumbers.end()) {
-          auto &[firstPage, lastPage] = it->getValue();
-          for (uint64_t i = firstPage; i <= lastPage; i++)
-            touchedPages.insert(i);
+      // The area under the curve F where F(t) is the total number of page
+      // faults at step t.
+      unsigned area = 0;
+      for (auto &trace : reader->getTemporalProfTraces()) {
+        SmallSet<uint64_t, 0> touchedPages;
+        for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
+          auto traceId = trace.FunctionNameRefs[step];
+          auto [Filename, ParsedFuncName] =
+              getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
+          ParsedFuncName = getRootSymbol(ParsedFuncName);
+          auto it = symbolToPageNumbers.find(ParsedFuncName);
+          if (it != symbolToPageNumbers.end()) {
+            auto &[firstPage, lastPage] = it->getValue();
+            for (uint64_t i = firstPage; i <= lastPage; i++)
+              touchedPages.insert(i);
+          }
+          area += touchedPages.size();
         }
-        area += touchedPages.size();
       }
+      dbgs() << "Total area under the page fault curve: " << (float)area
+             << "\n";
     }
-    dbgs() << "Total area under the page fault curve: " << (float)area << "\n";
   }
-#endif
 
   DenseMap<const InputSection *, size_t> sectionPriorities;
   for (const auto *isec : orderedSections)
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index c9643e9f5ef53..6f9eefd5d82be 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -15,6 +15,7 @@
 #define LLD_MACHO_BPSECTION_ORDERER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace lld::macho {
 
@@ -28,7 +29,8 @@ class InputSection;
 llvm::DenseMap<const lld::macho::InputSection *, size_t>
 runBalancedPartitioning(size_t &highestAvailablePriority,
                         llvm::StringRef profilePath,
-                        bool forFunctionCompression, bool forDataCompression);
+                        bool forFunctionCompression, bool forDataCompression,
+                        bool verbose);
 
 } // namespace lld::macho
 
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 3e1de28900007..d0d97b1d91a87 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -216,6 +216,7 @@ struct Configuration {
   llvm::StringRef profileGuidedFunctionOrderPath;
   bool functionOrderForCompression = false;
   bool dataOrderForCompression = false;
+  bool verboseBpSectionOrderer = false;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index efa2e368e6f62..5b4aa5f4f9917 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1740,6 +1740,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->functionOrderForCompression =
       args.hasArg(OPT_function_order_for_compression);
   config->dataOrderForCompression = args.hasArg(OPT_data_order_for_compression);
+  config->verboseBpSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
 
   for (const Arg *arg : args.filtered(OPT_alias)) {
     config->aliasedSymbols.push_back(
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 71f403855deb9..655fdc190a2b6 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -164,6 +164,9 @@ def function_order_for_compression: Flag<["--"], "function-order-for-compression
     HelpText<"Order functions to improve compressed size">, Group<grp_lld>;
 def data_order_for_compression: Flag<["--"], "data-order-for-compression">,
     HelpText<"Order data to improve compressed size">, Group<grp_lld>;
+def verbose_bp_section_orderer : Flag<["--"], "verbose-bp-section-orderer">,
+    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">,
+    Group<grp_lld>;
 
 // This is a complete Options.td compiled from Apple's ld(1) manpage
 // dated 2018-03-07 and cross checked with ld64 source code in repo
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index f4a7e5fdb9cb1..cb2dcdc8248bc 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -358,7 +358,8 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
     TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
     sectionPriorities = runBalancedPartitioning(
         highestAvailablePriority, config->profileGuidedFunctionOrderPath,
-        config->functionOrderForCompression, config->dataOrderForCompression);
+        config->functionOrderForCompression, config->dataOrderForCompression,
+        config->verboseBpSectionOrderer);
   } else if (config->callGraphProfileSort) {
     // Sort sections by the profile data provided by __LLVM,__cg_profile
     // sections.
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index e7614406edf22..3edab566f3f45 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -1,16 +1,16 @@
-# REQUIRES: aarch64, asserts
+# REQUIRES: aarch64
 
 # RUN: rm -rf %t && split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
 # RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP
 
 # RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --profile-guided-function-order=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --function-order-for-compression --data-order-for-compression 2>&1 | FileCheck %s --check-prefix=COMPRESSION
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --function-order-for-compression --data-order-for-compression 2>&1 | FileCheck %s --check-prefix=COMPRESSION
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --function-order-for-compression --data-order-for-compression --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=COMPRESSION
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --function-order-for-compression --data-order-for-compression --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=COMPRESSION
 
 
 # STARTUP: Ordered 3 sections using balanced partitioning

>From 010514fbd9fcb096ad7c3cd027c08c14f298bf49 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Fri, 19 Jul 2024 11:17:06 -0700
Subject: [PATCH 6/8] Remove isNumber checks

---
 lld/MachO/BPSectionOrderer.cpp | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 022cae5b38f30..26d4e0cb3987d 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -21,23 +21,14 @@
 using namespace llvm;
 using namespace lld::macho;
 
-// TODO: Move to StringRef.h
-static bool isNumber(StringRef S) {
-  return !S.empty() && S.find_first_not_of("0123456789") == StringRef::npos;
-}
-
 /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
 /// "yyyy" are numbers that could change between builds. We need to use the root
 /// symbol name before this suffix so these symbols can be matched with profiles
 /// which may have different suffixes.
 static StringRef getRootSymbol(StringRef Name) {
   auto [P0, S0] = Name.rsplit(".llvm.");
-  if (isNumber(S0))
-    Name = P0;
-  auto [P1, S1] = Name.rsplit(".__uniq.");
-  if (isNumber(S1))
-    return P1;
-  return Name;
+  auto [P1, S1] = P0.rsplit(".__uniq.");
+  return P1;
 }
 
 static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx,

>From 503fb68bdcbe10757cbd57f9fb3a57d64d4a1dae Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Fri, 19 Jul 2024 12:17:05 -0700
Subject: [PATCH 7/8] Rename flags

---
 lld/MachO/Config.h                         |  9 +++++----
 lld/MachO/Driver.cpp                       | 21 ++++++++++++++++-----
 lld/MachO/Options.td                       | 21 ++++++++++-----------
 lld/MachO/SectionPriorities.cpp            |  4 ++--
 lld/test/MachO/bp-section-orderer-stress.s |  4 ++--
 lld/test/MachO/bp-section-orderer.s        | 19 +++++++++++++------
 6 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index d0d97b1d91a87..5beb0662ba727 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -213,14 +213,15 @@ struct Configuration {
   llvm::StringRef csProfilePath;
   bool pgoWarnMismatch;
   bool warnThinArchiveMissingMembers;
-  llvm::StringRef profileGuidedFunctionOrderPath;
-  bool functionOrderForCompression = false;
-  bool dataOrderForCompression = false;
-  bool verboseBpSectionOrderer = false;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
 
+  llvm::StringRef irpgoProfileSortProfilePath;
+  bool functionOrderForCompression = false;
+  bool dataOrderForCompression = false;
+  bool verboseBpSectionOrderer = false;
+
   SectionRenameMap sectionRenameMap;
   SegmentRenameMap segmentRenameMap;
 
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 5b4aa5f4f9917..41a0bb7751575 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1735,11 +1735,22 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
       args.hasFlag(OPT_warn_thin_archive_missing_members,
                    OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
-  config->profileGuidedFunctionOrderPath =
-      args.getLastArgValue(OPT_profile_guided_function_order);
-  config->functionOrderForCompression =
-      args.hasArg(OPT_function_order_for_compression);
-  config->dataOrderForCompression = args.hasArg(OPT_data_order_for_compression);
+  config->irpgoProfileSortProfilePath =
+      args.getLastArgValue(OPT_irpgo_profile_sort);
+  if (const Arg *arg = args.getLastArg(OPT_compression_sort)) {
+    StringRef compressionSortStr = arg->getValue();
+    if (compressionSortStr == "function") {
+      config->functionOrderForCompression = true;
+    } else if (compressionSortStr == "data") {
+      config->dataOrderForCompression = true;
+    } else if (compressionSortStr == "both") {
+      config->functionOrderForCompression = true;
+      config->dataOrderForCompression = true;
+    } else if (compressionSortStr != "none") {
+      error("unknown value `" + compressionSortStr + "` for " +
+            arg->getSpelling());
+    }
+  }
   config->verboseBpSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
 
   for (const Arg *arg : args.filtered(OPT_alias)) {
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 655fdc190a2b6..75bfaed9e4c08 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -126,6 +126,16 @@ def no_call_graph_profile_sort : Flag<["--"], "no-call-graph-profile-sort">,
 def print_symbol_order_eq: Joined<["--"], "print-symbol-order=">,
     HelpText<"Print a symbol order specified by --call-graph-profile-sort into the specified file">,
     Group<grp_lld>;
+def irpgo_profile_sort: Joined<["--"], "irpgo-profile-sort=">,
+    MetaVarName<"<profile>">,
+    HelpText<"Read the IRPGO profile at <profile> to order sections to improve startup time">,
+    Group<grp_lld>;
+def compression_sort: Joined<["--"], "compression-sort=">,
+    MetaVarName<"[none,function,data,both]">,
+    HelpText<"Order sections to improve compressed size">, Group<grp_lld>;
+def verbose_bp_section_orderer: Flag<["--"], "verbose-bp-section-orderer">,
+    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">,
+    Group<grp_lld>;
 def ignore_auto_link_option : Separate<["--"], "ignore-auto-link-option">,
     Group<grp_lld>;
 def ignore_auto_link_option_eq : Joined<["--"], "ignore-auto-link-option=">,
@@ -156,17 +166,6 @@ defm pgo_warn_mismatch: BB<"pgo-warn-mismatch",
 defm warn_thin_archive_missing_members : BB<"warn-thin-archive-missing-members",
   "Warn on missing object files referenced by thin archives (default)",
   "Do not warn on missing object files referenced by thin archives">, Group<grp_lld>;
-def profile_guided_function_order: Joined<["--"], "profile-guided-function-order=">,
-    MetaVarName<"<profile>">, 
-    HelpText<"Read traces from <profile> to order functions to improve startup time">,
-    Group<grp_lld>;
-def function_order_for_compression: Flag<["--"], "function-order-for-compression">,
-    HelpText<"Order functions to improve compressed size">, Group<grp_lld>;
-def data_order_for_compression: Flag<["--"], "data-order-for-compression">,
-    HelpText<"Order data to improve compressed size">, Group<grp_lld>;
-def verbose_bp_section_orderer : Flag<["--"], "verbose-bp-section-orderer">,
-    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">,
-    Group<grp_lld>;
 
 // This is a complete Options.td compiled from Apple's ld(1) manpage
 // dated 2018-03-07 and cross checked with ld64 source code in repo
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index cb2dcdc8248bc..69c301d8ff8a7 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -353,11 +353,11 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
 DenseMap<const InputSection *, size_t>
 macho::PriorityBuilder::buildInputSectionPriorities() {
   DenseMap<const InputSection *, size_t> sectionPriorities;
-  if (!config->profileGuidedFunctionOrderPath.empty() ||
+  if (!config->irpgoProfileSortProfilePath.empty() ||
       config->functionOrderForCompression || config->dataOrderForCompression) {
     TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
     sectionPriorities = runBalancedPartitioning(
-        highestAvailablePriority, config->profileGuidedFunctionOrderPath,
+        highestAvailablePriority, config->irpgoProfileSortProfilePath,
         config->functionOrderForCompression, config->dataOrderForCompression,
         config->verboseBpSectionOrderer);
   } else if (config->callGraphProfileSort) {
diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s
index a536c79d45021..fdc6a20e2655b 100644
--- a/lld/test/MachO/bp-section-orderer-stress.s
+++ b/lld/test/MachO/bp-section-orderer-stress.s
@@ -7,8 +7,8 @@
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t.s -o %t.o
 # RUN: llvm-profdata merge %t.proftext -o %t.profdata
 
-# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --profile-guided-function-order=%t.profdata --function-order-for-compression --data-order-for-compression | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
-# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --profile-guided-function-order=%t.profdata --function-order-for-compression --data-order-for-compression | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
 # RUN: diff %t.order1.txt %t.order2.txt
 
 import random
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 3edab566f3f45..ff13877284901 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -4,13 +4,18 @@
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
 # RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --profile-guided-function-order=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --function-order-for-compression --data-order-for-compression --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=COMPRESSION
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --profile-guided-function-order=%t/a.profdata --function-order-for-compression --data-order-for-compression --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=COMPRESSION
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# RUN: not %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
+# COMPRESSION-ERR: unknown value `malformed` for --compression-sort=
 
 
 # STARTUP: Ordered 3 sections using balanced partitioning
@@ -22,7 +27,9 @@
 # ORDERFILE-DAG: _B
 # ORDERFILE-DAG: l_C
 
-# COMPRESSION: Ordered 11 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 4 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 11 sections using balanced partitioning
 
 #--- a.s
 .text

>From 5e29239a083cfb04977167b74eec221b2f9715d1 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Fri, 19 Jul 2024 13:27:54 -0700
Subject: [PATCH 8/8] Throw error when used with --call-graph-profile-sort

---
 lld/MachO/Driver.cpp                     | 15 +++++++++++++--
 lld/test/MachO/bp-section-orderer-errs.s |  8 ++++++++
 lld/test/MachO/bp-section-orderer.s      | 23 ++++++++++++-----------
 3 files changed, 33 insertions(+), 13 deletions(-)
 create mode 100644 lld/test/MachO/bp-section-orderer-errs.s

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 41a0bb7751575..c238fd52218a0 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1735,8 +1735,17 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
       args.hasFlag(OPT_warn_thin_archive_missing_members,
                    OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
-  config->irpgoProfileSortProfilePath =
-      args.getLastArgValue(OPT_irpgo_profile_sort);
+
+  auto IncompatWithCGSort = [&](StringRef firstArgStr) {
+    // Throw an error only if --call-graph-profile-sort is explicitly specified
+    if (config->callGraphProfileSort)
+      if (const Arg *arg = args.getLastArgNoClaim(OPT_call_graph_profile_sort))
+        error(firstArgStr + " is incompatible with " + arg->getSpelling());
+  };
+  if (const Arg *arg = args.getLastArg(OPT_irpgo_profile_sort)) {
+    config->irpgoProfileSortProfilePath = arg->getValue();
+    IncompatWithCGSort(arg->getSpelling());
+  }
   if (const Arg *arg = args.getLastArg(OPT_compression_sort)) {
     StringRef compressionSortStr = arg->getValue();
     if (compressionSortStr == "function") {
@@ -1750,6 +1759,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
       error("unknown value `" + compressionSortStr + "` for " +
             arg->getSpelling());
     }
+    if (compressionSortStr != "none")
+      IncompatWithCGSort(arg->getSpelling());
   }
   config->verboseBpSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
 
diff --git a/lld/test/MachO/bp-section-orderer-errs.s b/lld/test/MachO/bp-section-orderer-errs.s
new file mode 100644
index 0000000000000..f248b860ce5dc
--- /dev/null
+++ b/lld/test/MachO/bp-section-orderer-errs.s
@@ -0,0 +1,8 @@
+# RUN: not %lld -o /dev/null --irpgo-profile-sort=%s --call-graph-profile-sort 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# IRPGO-ERR: --irpgo-profile-sort= is incompatible with --call-graph-profile-sort
+
+# RUN: not %lld -o /dev/null --compression-sort=function --call-graph-profile-sort %s 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
+# COMPRESSION-ERR: --compression-sort= is incompatible with --call-graph-profile-sort
+
+# RUN: not %lld -o /dev/null --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
+# COMPRESSION-MALFORM: unknown value `malformed` for --compression-sort=
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index ff13877284901..407787025150d 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -7,25 +7,26 @@
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
 
-# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
-
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-
-# RUN: not %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
-# COMPRESSION-ERR: unknown value `malformed` for --compression-sort=
-
-
 # STARTUP: Ordered 3 sections using balanced partitioning
 
+# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
 # ORDERFILE: A
 # ORDERFILE: F
 # ORDERFILE: E
 # ORDERFILE: D
+# ORDERFILE-DAG: _main
 # ORDERFILE-DAG: _B
 # ORDERFILE-DAG: l_C
+# ORDERFILE-DAG: s1
+# ORDERFILE-DAG: s2
+# ORDERFILE-DAG: r1
+# ORDERFILE-DAG: r2
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 
 # COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
 # COMPRESSION-DATA: Ordered 4 sections using balanced partitioning



More information about the llvm-commits mailing list