[lld] [LLD][ELF] add bp-* options in ELF (PR #120514)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 2 13:55:19 PST 2025


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/120514

>From bbae02a7dbc586ed180694e1bb635ff3e360ea9a Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Wed, 15 Jan 2025 15:01:01 +0800
Subject: [PATCH 01/10] Add new ELF linker options for order section layout

---
 lld/ELF/BPSectionOrderer.cpp             |  68 +++++
 lld/ELF/BPSectionOrderer.h               | 109 ++++++++
 lld/ELF/CMakeLists.txt                   |   1 +
 lld/ELF/Config.h                         |   6 +
 lld/ELF/Driver.cpp                       |  49 ++++
 lld/ELF/Options.td                       |  18 ++
 lld/ELF/Writer.cpp                       |  11 +
 lld/test/ELF/bp-section-orderer-stress.s | 104 +++++++
 lld/test/ELF/bp-section-orderer.s        | 328 +++++++++++++++++++++++
 9 files changed, 694 insertions(+)
 create mode 100644 lld/ELF/BPSectionOrderer.cpp
 create mode 100644 lld/ELF/BPSectionOrderer.h
 create mode 100644 lld/test/ELF/bp-section-orderer-stress.s
 create mode 100644 lld/test/ELF/bp-section-orderer.s

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
new file mode 100644
index 00000000000000..b4be4a9aa3b60d
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -0,0 +1,68 @@
+//===- BPSectionOrderer.cpp------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPSectionOrderer.h"
+#include "llvm/Support/xxhash.h"
+
+using namespace llvm;
+using namespace lld::elf;
+
+void BPSectionELF::getSectionHashes(
+    llvm::SmallVectorImpl<uint64_t> &hashes,
+    const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) const {
+  constexpr unsigned windowSize = 4;
+
+  size_t size = isec->content().size();
+  for (size_t i = 0; i != size; ++i) {
+    auto window = isec->content().drop_front(i).take_front(windowSize);
+    hashes.push_back(xxHash64(window));
+  }
+
+  llvm::sort(hashes);
+  hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+}
+
+llvm::DenseMap<const lld::elf::InputSectionBase *, int>
+lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+                                  bool forFunctionCompression,
+                                  bool forDataCompression,
+                                  bool compressionSortStartupFunctions,
+                                  bool verbose) {
+  // Collect sections from symbols and wrap as BPSectionELF instances.
+  // Deduplicates sections referenced by multiple symbols.
+  SmallVector<std::unique_ptr<BPSectionBase>> sections;
+  DenseSet<const InputSectionBase *> seenSections;
+
+  for (Symbol *sym : ctx.symtab->getSymbols())
+    if (sym->getSize() > 0)
+      if (auto *d = dyn_cast<Defined>(sym))
+        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+          if (seenSections.insert(sec).second)
+            sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+
+  for (ELFFileBase *file : ctx.objectFiles)
+    for (Symbol *sym : file->getLocalSymbols())
+      if (sym->getSize() > 0)
+        if (auto *d = dyn_cast<Defined>(sym))
+          if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+            if (seenSections.insert(sec).second)
+              sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+
+  auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
+      profilePath, forFunctionCompression, forDataCompression,
+      compressionSortStartupFunctions, verbose, sections);
+
+  DenseMap<const InputSectionBase *, int> result;
+  for (const auto [sec, priority] : reorderedSections) {
+    auto *elfSection = cast<BPSectionELF>(sec);
+    result.try_emplace(
+        static_cast<const InputSectionBase *>(elfSection->getSection()),
+        static_cast<int>(priority));
+  }
+  return result;
+}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
new file mode 100644
index 00000000000000..6e31e08bdf9a6a
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.h
@@ -0,0 +1,109 @@
+//===- BPSectionOrderer.h -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file uses Balanced Partitioning to order sections to improve startup
+/// time and compressed size.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_BPSECTION_ORDERER_H
+#define LLD_ELF_BPSECTION_ORDERER_H
+
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "SymbolTable.h"
+#include "lld/Common/BPSectionOrdererBase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+namespace lld::elf {
+
+class InputSection;
+
+class BPSymbolELF : public BPSymbol {
+  const Symbol *sym;
+
+public:
+  explicit BPSymbolELF(const Symbol *s) : sym(s) {}
+
+  llvm::StringRef getName() const override { return sym->getName(); }
+
+  const Defined *asDefined() const { return llvm::dyn_cast<Defined>(sym); }
+
+  std::optional<uint64_t> getValue() const override {
+    if (auto *d = asDefined())
+      return d->value;
+    return {};
+  }
+
+  std::optional<uint64_t> getSize() const override {
+    if (auto *d = asDefined())
+      return d->size;
+    return {};
+  }
+
+  InputSectionBase *getInputSection() const {
+    if (auto *d = asDefined())
+      return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
+    return nullptr;
+  }
+
+  const Symbol *getSymbol() const { return sym; }
+};
+
+class BPSectionELF : public BPSectionBase {
+  const InputSectionBase *isec;
+
+public:
+  explicit BPSectionELF(const InputSectionBase *sec) : isec(sec) {}
+
+  const void *getSection() const override { return isec; }
+
+  uint64_t getSize() const override { return isec->getSize(); }
+
+  bool isCodeSection() const override {
+    return isec->flags & llvm::ELF::SHF_EXECINSTR;
+  }
+
+  SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
+    SmallVector<std::unique_ptr<BPSymbol>> symbols;
+    for (Symbol *sym : isec->file->getSymbols())
+      if (auto *d = dyn_cast<Defined>(sym))
+        if (d->size > 0 && d->section == isec)
+          symbols.emplace_back(std::make_unique<BPSymbolELF>(sym));
+
+    return symbols;
+  }
+
+  std::optional<StringRef>
+  getResolvedLinkageName(llvm::StringRef name) const override {
+    return {};
+  }
+
+  void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
+                        const llvm::DenseMap<const void *, uint64_t>
+                            &sectionToIdx) const override;
+
+  static bool classof(const BPSectionBase *s) { return true; }
+};
+
+/// Run Balanced Partitioning to find the optimal function and data order to
+/// improve startup time and compressed size.
+///
+/// It is important that -ffunction-sections and -fdata-sections are used to
+/// ensure functions and data are in their own sections and thus can be
+/// reordered.
+llvm::DenseMap<const InputSectionBase *, int>
+runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+                        bool forFunctionCompression, bool forDataCompression,
+                        bool compressionSortStartupFunctions, bool verbose);
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 83d816ddb0601e..298443cd6ea42c 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_lld_library(lldELF
   Arch/X86.cpp
   Arch/X86_64.cpp
   ARMErrataFix.cpp
+  BPSectionOrderer.cpp
   CallGraphSort.cpp
   DWARF.cpp
   Driver.cpp
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index df262fdc811b09..3cdb400e423fd9 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -264,6 +264,12 @@ struct Config {
   bool armBe8 = false;
   BsymbolicKind bsymbolic = BsymbolicKind::None;
   CGProfileSortKind callGraphProfileSort;
+  llvm::StringRef irpgoProfilePath;
+  bool bpStartupFunctionSort = false;
+  bool bpCompressionSortStartupFunctions = false;
+  bool bpFunctionOrderForCompression = false;
+  bool bpDataOrderForCompression = false;
+  bool bpVerboseSectionOrderer = false;
   bool checkSections;
   bool checkDynamicRelocs;
   std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d92338608b059d..6275549a13a2be 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1259,6 +1259,55 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       ctx.arg.bsymbolic = BsymbolicKind::All;
   }
   ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
+  ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
+  ctx.arg.bpCompressionSortStartupFunctions =
+      args.hasFlag(OPT_bp_compression_sort_startup_functions,
+                   OPT_no_bp_compression_sort_startup_functions, false);
+  if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
+    StringRef startupSortStr = arg->getValue();
+    if (startupSortStr == "function") {
+      ctx.arg.bpStartupFunctionSort = true;
+    } else if (startupSortStr != "none") {
+      ErrAlways(ctx) << "unknown value '" + startupSortStr + "' for " +
+                            arg->getSpelling();
+    }
+    if (startupSortStr != "none")
+      if (args.hasArg(OPT_call_graph_ordering_file))
+        ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
+                          "--call-graph-ordering-file";
+  }
+  if (ctx.arg.irpgoProfilePath.empty()) {
+    if (ctx.arg.bpStartupFunctionSort)
+      ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
+                        "--irpgo-profile";
+    if (ctx.arg.bpCompressionSortStartupFunctions)
+      ErrAlways(ctx)
+          << "--bp-compression-sort-startup-functions must be used with "
+             "--irpgo-profile";
+  }
+
+  if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
+    StringRef compressionSortStr = arg->getValue();
+    if (compressionSortStr == "function") {
+      ctx.arg.bpFunctionOrderForCompression = true;
+    } else if (compressionSortStr == "data") {
+      ctx.arg.bpDataOrderForCompression = true;
+    } else if (compressionSortStr == "both") {
+      ctx.arg.bpFunctionOrderForCompression = true;
+      ctx.arg.bpDataOrderForCompression = true;
+    } else if (compressionSortStr != "none") {
+      ErrAlways(ctx) << "unknown value '" + compressionSortStr + "' for " +
+                            arg->getSpelling();
+    }
+    if (ctx.arg.bpDataOrderForCompression ||
+        ctx.arg.bpFunctionOrderForCompression) {
+      if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
+        ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
+                          "--call-graph-ordering-file";
+      }
+    }
+  }
+  ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
   ctx.arg.checkSections =
       args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
   ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c31875305952fb..1948a5a524170c 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -141,6 +141,24 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
 def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
   Flags<[HelpHidden]>;
 
+defm irpgo_profile: Eq<"irpgo-profile",
+    "Read the IRPGO profile for use with -bp-startup-sort and other profile-guided optimizations">;
+
+def bp_startup_sort: JJ<"bp-startup-sort=">,
+    MetaVarName<"[none,function]">,
+    HelpText<"Order sections based on profile data to improve startup time">;
+
+defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
+    "Order startup functions by balanced partition to improve compressed size in addition to startup time",
+    "Do not order startup function for compression">;
+    
+def bp_compression_sort: JJ<"bp-compression-sort=">,
+    MetaVarName<"[none,function,data,both]">,
+    HelpText<"Order sections by balanced partition to improve compressed size">;
+
+def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
+    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">;
+    
 // --chroot doesn't have a help text because it is an internal option.
 def chroot: Separate<["--"], "chroot">;
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 487fb119a966b1..43ddff32dfe394 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -9,6 +9,7 @@
 #include "Writer.h"
 #include "AArch64ErrataFix.h"
 #include "ARMErrataFix.h"
+#include "BPSectionOrderer.h"
 #include "CallGraphSort.h"
 #include "Config.h"
 #include "InputFiles.h"
@@ -1082,6 +1083,16 @@ static void maybeShuffle(Ctx &ctx,
 // that don't appear in the order file.
 static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
   DenseMap<const InputSectionBase *, int> sectionOrder;
+  if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
+      ctx.arg.bpDataOrderForCompression) {
+    TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
+    sectionOrder = runBalancedPartitioning(
+        ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
+        ctx.arg.bpFunctionOrderForCompression,
+        ctx.arg.bpDataOrderForCompression,
+        ctx.arg.bpCompressionSortStartupFunctions,
+        ctx.arg.bpVerboseSectionOrderer);
+  }
   if (!ctx.arg.callGraphProfile.empty())
     sectionOrder = computeCallGraphProfileOrder(ctx);
 
diff --git a/lld/test/ELF/bp-section-orderer-stress.s b/lld/test/ELF/bp-section-orderer-stress.s
new file mode 100644
index 00000000000000..ff10a24e381c25
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer-stress.s
@@ -0,0 +1,104 @@
+# REQUIRES: aarch64
+
+## Generate a large test case and check that the output is deterministic.
+
+# RUN: %python %s %t.s %t.proftext
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
+# RUN: llvm-profdata merge %t.proftext -o %t.profdata
+
+# RUN: ld.lld --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: ld.lld --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: diff %t.order1.txt %t.order2.txt
+
+import random
+import sys
+
+assembly_filepath = sys.argv[1]
+proftext_filepath = sys.argv[2]
+
+random.seed(1234)
+num_functions = 1000
+num_data = 100
+num_traces = 10
+
+function_names = [f"f{n}" for n in range(num_functions)]
+data_names = [f"d{n}" for n in range(num_data)]
+profiled_functions = function_names[: int(num_functions / 2)]
+
+function_contents = [
+    f"""
+{name}:
+  add w0, w0, #{i % 4096}
+  add w1, w1, #{i % 10}
+  add w2, w0, #{i % 20}
+  adrp x3, {name}
+  ret
+"""
+    for i, name in enumerate(function_names)
+]
+
+data_contents = [
+      f"""
+{name}:
+  .ascii "s{i % 2}-{i % 3}-{i % 5}"
+  .xword {name}
+"""
+    for i, name in enumerate(data_names)
+]
+
+trace_contents = [
+    f"""
+# Weight
+1
+{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
+"""
+    for i in range(num_traces)
+]
+
+profile_contents = [
+    f"""
+{name}
+# Func Hash:
+{i}
+# Num Counters:
+1
+# Counter Values:
+1
+"""
+    for i, name in enumerate(profiled_functions)
+]
+
+with open(assembly_filepath, "w") as f:
+    f.write(
+        f"""
+.text
+.globl _start
+
+_start:
+  ret
+
+{"".join(function_contents)}
+
+.data
+{"".join(data_contents)}
+
+"""
+    )
+
+with open(proftext_filepath, "w") as f:
+    f.write(
+        f"""
+:ir
+:temporal_prof_traces
+
+# Num Traces
+{num_traces}
+# Trace Stream Size:
+{num_traces}
+
+{"".join(trace_contents)}
+
+{"".join(profile_contents)}
+"""
+    )
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
new file mode 100644
index 00000000000000..a7130d9b382d9f
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -0,0 +1,328 @@
+# REQUIRES: aarch64
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+## Check for incompatible cases
+# RUN: not ld.lld -o /dev/null %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
+# RUN: not ld.lld -o /dev/null --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
+# RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
+# RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
+# RUN: not ld.lld --bp-compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-MALFORM
+
+# BP-STARTUP-CALLGRAPH-ERR: error: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
+# BP-COMPRESSION-CALLGRAPH-ERR: error: --bp-compression-sort is incompatible with --call-graph-ordering-file
+# BP-STARTUP-ERR: error: --bp-startup-sort=function must be used with --irpgo-profile
+# BP-STARTUP-COMPRESSION-ERR: error: --bp-compression-sort-startup-functions must be used with --irpgo-profile
+# BP-COMPRESSION-MALFORM: error: unknown value 'malformed' for --bp-compression-sort=
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
+# RUN: llvm-profdata merge a.proftext -o a.profdata
+# RUN: ld.lld -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
+
+# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
+
+# RUN: ld.lld -o - a.o --symbol-ordering-file a.orderfile --irpgo-profile=a.profdata --bp-startup-sort=function | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+# RUN: ld.lld -o - a.o --symbol-ordering-file a.orderfile --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+## Rodata
+# ORDERFILE: s2
+# ORDERFILE: s1
+# ORDERFILE-DAG: s3
+
+## Functions
+# ORDERFILE: A
+# ORDERFILE: F
+# ORDERFILE: E
+# ORDERFILE: D
+# ORDERFILE-DAG: _start
+# ORDERFILE-DAG: B
+# ORDERFILE-DAG: C
+
+## Data
+# ORDERFILE: r3
+# ORDERFILE: r2
+# ORDERFILE-DAG: r1
+# ORDERFILE-DAG: r4
+
+# RUN: ld.lld -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: ld.lld -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
+# RUN: ld.lld -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
+# RUN: ld.lld -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
+
+# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+A, B, C
+
+A
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+B
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+C
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+D
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+A
+F
+E
+D
+s2
+s1
+r3
+r2
+
+#--- a.c
+const char s1[] = "hello world";
+const char s2[] = "i am a string";
+const char s3[] = "this is s3";
+const char* r1 = s1;
+const char** r2 = &r1;
+const char*** r3 = &r2;
+const char* r4 = s2;
+void A() {
+    return;
+}
+
+int B(int a) {
+    A();
+    return a + 1;
+}
+
+int C(int a) {
+    A();
+    return a + 2;
+}
+
+int D(int a) {
+    return B(a + 2);
+}
+
+int E(int a) {
+    return C(a + 2);
+}
+
+int F(int a) {
+    return C(a + 3);
+}
+
+int _start() {
+    return 0;
+}
+#--- gen
+clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.c -o -
+;--- a.s
+	.text
+	.file	"a.c"
+	.section	.text.A,"ax", at progbits
+	.globl	A                               // -- Begin function A
+	.p2align	2
+	.type	A, at function
+A:                                      // @A
+// %bb.0:
+	ret
+.Lfunc_end0:
+	.size	A, .Lfunc_end0-A
+                                        // -- End function
+	.section	.text.B,"ax", at progbits
+	.globl	B                               // -- Begin function B
+	.p2align	2
+	.type	B, at function
+B:                                      // @B
+// %bb.0:
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	bl	A
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #1
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end1:
+	.size	B, .Lfunc_end1-B
+                                        // -- End function
+	.section	.text.C,"ax", at progbits
+	.globl	C                               // -- Begin function C
+	.p2align	2
+	.type	C, at function
+C:                                      // @C
+// %bb.0:
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	bl	A
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end2:
+	.size	C, .Lfunc_end2-C
+                                        // -- End function
+	.section	.text.D,"ax", at progbits
+	.globl	D                               // -- Begin function D
+	.p2align	2
+	.type	D, at function
+D:                                      // @D
+// %bb.0:
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	bl	B
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end3:
+	.size	D, .Lfunc_end3-D
+                                        // -- End function
+	.section	.text.E,"ax", at progbits
+	.globl	E                               // -- Begin function E
+	.p2align	2
+	.type	E, at function
+E:                                      // @E
+// %bb.0:
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #2
+	bl	C
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end4:
+	.size	E, .Lfunc_end4-E
+                                        // -- End function
+	.section	.text.F,"ax", at progbits
+	.globl	F                               // -- Begin function F
+	.p2align	2
+	.type	F, at function
+F:                                      // @F
+// %bb.0:
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #3
+	bl	C
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end5:
+	.size	F, .Lfunc_end5-F
+                                        // -- End function
+	.section	.text._start,"ax", at progbits
+	.globl	_start                          // -- Begin function _start
+	.p2align	2
+	.type	_start, at function
+_start:                                 // @_start
+// %bb.0:
+	mov	w0, wzr
+	ret
+.Lfunc_end6:
+	.size	_start, .Lfunc_end6-_start
+                                        // -- End function
+	.type	s1, at object                      // @s1
+	.section	.rodata.s1,"a", at progbits
+	.globl	s1
+s1:
+	.asciz	"hello world"
+	.size	s1, 12
+
+	.type	s2, at object                      // @s2
+	.section	.rodata.s2,"a", at progbits
+	.globl	s2
+s2:
+	.asciz	"i am a string"
+	.size	s2, 14
+
+	.type	s3, at object                      // @s3
+	.section	.rodata.s3,"a", at progbits
+	.globl	s3
+s3:
+	.asciz	"this is s3"
+	.size	s3, 11
+
+	.type	r1, at object                      // @r1
+	.section	.data.r1,"aw", at progbits
+	.globl	r1
+	.p2align	3, 0x0
+r1:
+	.xword	s1
+	.size	r1, 8
+
+	.type	r2, at object                      // @r2
+	.section	.data.r2,"aw", at progbits
+	.globl	r2
+	.p2align	3, 0x0
+r2:
+	.xword	r1
+	.size	r2, 8
+
+	.type	r3, at object                      // @r3
+	.section	.data.r3,"aw", at progbits
+	.globl	r3
+	.p2align	3, 0x0
+r3:
+	.xword	r2
+	.size	r3, 8
+
+	.type	r4, at object                      // @r4
+	.section	.data.r4,"aw", at progbits
+	.globl	r4
+	.p2align	3, 0x0
+r4:
+	.xword	s2
+	.size	r4, 8
+
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.addrsig_sym A
+	.addrsig_sym B
+	.addrsig_sym C
+	.addrsig_sym s1
+	.addrsig_sym s2
+	.addrsig_sym r1
+	.addrsig_sym r2

>From 6d0fcc9c27165d26e882a35a7ca021933059e2e1 Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Wed, 15 Jan 2025 15:10:41 +0800
Subject: [PATCH 02/10] try to refine symbol/section

---
 lld/ELF/BPSectionOrderer.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index b4be4a9aa3b60d..a7905a8a919c1e 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -38,20 +38,21 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
   SmallVector<std::unique_ptr<BPSectionBase>> sections;
   DenseSet<const InputSectionBase *> seenSections;
 
+  auto addSection = [&](Symbol &sym) {
+    if (sym.getSize() == 0)
+      return;
+    if (auto *d = dyn_cast<Defined>(&sym))
+      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+        if (seenSections.insert(sec).second)
+          sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+  };
+
   for (Symbol *sym : ctx.symtab->getSymbols())
-    if (sym->getSize() > 0)
-      if (auto *d = dyn_cast<Defined>(sym))
-        if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-          if (seenSections.insert(sec).second)
-            sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+    addSection(*sym);
 
   for (ELFFileBase *file : ctx.objectFiles)
     for (Symbol *sym : file->getLocalSymbols())
-      if (sym->getSize() > 0)
-        if (auto *d = dyn_cast<Defined>(sym))
-          if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-            if (seenSections.insert(sec).second)
-              sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+      addSection(*sym);
 
   auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
       profilePath, forFunctionCompression, forDataCompression,

>From d7055267138da638ffea05ff47d3a24a1af503d1 Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Fri, 17 Jan 2025 02:01:26 +0800
Subject: [PATCH 03/10] change content hash as #121729

---
 lld/ELF/BPSectionOrderer.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index a7905a8a919c1e..1236dd28119a24 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -17,11 +17,13 @@ void BPSectionELF::getSectionHashes(
     const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) const {
   constexpr unsigned windowSize = 4;
 
-  size_t size = isec->content().size();
-  for (size_t i = 0; i != size; ++i) {
-    auto window = isec->content().drop_front(i).take_front(windowSize);
-    hashes.push_back(xxHash64(window));
-  }
+  // Calculate content hashes: k-mers and the last k-1 bytes.
+  ArrayRef<uint8_t> data = isec->content();
+  if (data.size() >= windowSize)
+    for (size_t i = 0; i <= data.size() - windowSize; ++i)
+      hashes.push_back(llvm::support::endian::read32le(data.data() + i));
+  for (uint8_t byte : data.take_back(windowSize - 1))
+    hashes.push_back(byte);
 
   llvm::sort(hashes);
   hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());

>From cc3febaf5df9ccebaa7aa6ec5892d674d0dfcfaa Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Thu, 23 Jan 2025 18:02:52 +0800
Subject: [PATCH 04/10] remove useless header

---
 lld/ELF/BPSectionOrderer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 1236dd28119a24..5c3618cc7193c6 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "BPSectionOrderer.h"
-#include "llvm/Support/xxhash.h"
 
 using namespace llvm;
 using namespace lld::elf;

>From dad8b8b57617ccd2d93cd32f664aa25845dccd24 Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Tue, 28 Jan 2025 14:54:32 +0800
Subject: [PATCH 05/10] omit -o /dev/null

---
 lld/test/ELF/bp-section-orderer.s | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index a7130d9b382d9f..bbb74ad4aa6f4c 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -3,8 +3,8 @@
 # RUN: rm -rf %t && split-file %s %t && cd %t
 
 ## Check for incompatible cases
-# RUN: not ld.lld -o /dev/null %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
-# RUN: not ld.lld -o /dev/null --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
+# RUN: not ld.lld %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
+# RUN: not ld.lld --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
 # RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
 # RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
 # RUN: not ld.lld --bp-compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-MALFORM

>From dac737ee926315620d5b4ae981f80c17be3462c1 Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Tue, 28 Jan 2025 15:47:33 +0800
Subject: [PATCH 06/10] rewrite BPSectionOrder in ELF following #124482

---
 lld/ELF/BPSectionOrderer.cpp | 121 ++++++++++++++++++++++++-----------
 lld/ELF/BPSectionOrderer.h   |  83 ++----------------------
 2 files changed, 88 insertions(+), 116 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 5c3618cc7193c6..07da7869a77ff1 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.cpp------------------------------------------------===//
+//===- BPSectionOrderer.cpp -----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,36 +7,82 @@
 //===----------------------------------------------------------------------===//
 
 #include "BPSectionOrderer.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/BPSectionOrdererBase.inc"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "bp-section-orderer"
 
 using namespace llvm;
 using namespace lld::elf;
 
-void BPSectionELF::getSectionHashes(
-    llvm::SmallVectorImpl<uint64_t> &hashes,
-    const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) const {
-  constexpr unsigned windowSize = 4;
+namespace {
+struct BPOrdererELF;
+}
+template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
+  using Section = elf::InputSectionBase;
+  using Symbol = elf::Symbol;
+};
+namespace {
+struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
+  static uint64_t getSize(const Section &sec) { return sec.getSize(); }
+  static bool isCodeSection(const Section &sec) {
+    return sec.flags & llvm::ELF::SHF_EXECINSTR;
+  }
+  static SmallVector<Symbol *, 0> getSymbols(const Section &sec) {
+    SmallVector<Symbol *, 0> symbols;
+    for (auto *sym : sec.file->getSymbols())
+      if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
+        if (d->size > 0 && d->section == &sec)
+          symbols.emplace_back(d);
+    return symbols;
+  }
+
+  std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
+    return name;
+  }
+
+  static void
+  getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
+                   const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) {
+    constexpr unsigned windowSize = 4;
 
-  // Calculate content hashes: k-mers and the last k-1 bytes.
-  ArrayRef<uint8_t> data = isec->content();
-  if (data.size() >= windowSize)
-    for (size_t i = 0; i <= data.size() - windowSize; ++i)
-      hashes.push_back(llvm::support::endian::read32le(data.data() + i));
-  for (uint8_t byte : data.take_back(windowSize - 1))
-    hashes.push_back(byte);
+    // Calculate content hashes: k-mers and the last k-1 bytes.
+    ArrayRef<uint8_t> data = sec.content();
+    if (data.size() >= windowSize)
+      for (size_t i = 0; i <= data.size() - windowSize; ++i)
+        hashes.push_back(llvm::support::endian::read32le(data.data() + i));
+    for (uint8_t byte : data.take_back(windowSize - 1))
+      hashes.push_back(byte);
 
-  llvm::sort(hashes);
-  hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
-}
+    llvm::sort(hashes);
+    hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+  }
+
+  static llvm::StringRef getSymName(const Symbol &sym) { return sym.getName(); }
+  static uint64_t getSymValue(const Symbol &sym) {
+    if (auto *d = dyn_cast<Defined>(&sym))
+      return d->value;
+    return 0;
+  }
+  static uint64_t getSymSize(const Symbol &sym) {
+    if (auto *d = dyn_cast<Defined>(&sym))
+      return d->size;
+    return 0;
+  }
+};
+} // namespace
 
-llvm::DenseMap<const lld::elf::InputSectionBase *, int>
-lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
-                                  bool forFunctionCompression,
-                                  bool forDataCompression,
-                                  bool compressionSortStartupFunctions,
-                                  bool verbose) {
-  // Collect sections from symbols and wrap as BPSectionELF instances.
-  // Deduplicates sections referenced by multiple symbols.
-  SmallVector<std::unique_ptr<BPSectionBase>> sections;
+DenseMap<const InputSectionBase *, int> lld::elf::runBalancedPartitioning(
+    Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
+    bool forDataCompression, bool compressionSortStartupFunctions,
+    bool verbose) {
+  // Collect candidate sections and associated symbols.
+  SmallVector<InputSectionBase *> sections;
+  DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
   DenseSet<const InputSectionBase *> seenSections;
 
   auto addSection = [&](Symbol &sym) {
@@ -44,8 +90,15 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
       return;
     if (auto *d = dyn_cast<Defined>(&sym))
       if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-        if (seenSections.insert(sec).second)
-          sections.emplace_back(std::make_unique<BPSectionELF>(sec));
+        if (seenSections.insert(sec).second) {
+          size_t idx = sections.size();
+          sections.emplace_back(sec);
+          auto rootName = getRootSymbol(sym.getName());
+          rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
+          if (auto linkageName = BPOrdererELF::getResolvedLinkageName(rootName))
+            rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
+                idx);
+        }
   };
 
   for (Symbol *sym : ctx.symtab->getSymbols())
@@ -55,16 +108,8 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
     for (Symbol *sym : file->getLocalSymbols())
       addSection(*sym);
 
-  auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
-      profilePath, forFunctionCompression, forDataCompression,
-      compressionSortStartupFunctions, verbose, sections);
-
-  DenseMap<const InputSectionBase *, int> result;
-  for (const auto [sec, priority] : reorderedSections) {
-    auto *elfSection = cast<BPSectionELF>(sec);
-    result.try_emplace(
-        static_cast<const InputSectionBase *>(elfSection->getSection()),
-        static_cast<int>(priority));
-  }
-  return result;
+  return BPOrdererELF::computeOrder(profilePath, forFunctionCompression,
+                                    forDataCompression,
+                                    compressionSortStartupFunctions, verbose,
+                                    sections, rootSymbolToSectionIdxs);
 }
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 6e31e08bdf9a6a..e9a58b85b1b0d3 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -14,96 +14,23 @@
 #ifndef LLD_ELF_BPSECTION_ORDERER_H
 #define LLD_ELF_BPSECTION_ORDERER_H
 
-#include "InputFiles.h"
-#include "InputSection.h"
-#include "SymbolTable.h"
-#include "lld/Common/BPSectionOrdererBase.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/ELF.h"
 
 namespace lld::elf {
-
-class InputSection;
-
-class BPSymbolELF : public BPSymbol {
-  const Symbol *sym;
-
-public:
-  explicit BPSymbolELF(const Symbol *s) : sym(s) {}
-
-  llvm::StringRef getName() const override { return sym->getName(); }
-
-  const Defined *asDefined() const { return llvm::dyn_cast<Defined>(sym); }
-
-  std::optional<uint64_t> getValue() const override {
-    if (auto *d = asDefined())
-      return d->value;
-    return {};
-  }
-
-  std::optional<uint64_t> getSize() const override {
-    if (auto *d = asDefined())
-      return d->size;
-    return {};
-  }
-
-  InputSectionBase *getInputSection() const {
-    if (auto *d = asDefined())
-      return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
-    return nullptr;
-  }
-
-  const Symbol *getSymbol() const { return sym; }
-};
-
-class BPSectionELF : public BPSectionBase {
-  const InputSectionBase *isec;
-
-public:
-  explicit BPSectionELF(const InputSectionBase *sec) : isec(sec) {}
-
-  const void *getSection() const override { return isec; }
-
-  uint64_t getSize() const override { return isec->getSize(); }
-
-  bool isCodeSection() const override {
-    return isec->flags & llvm::ELF::SHF_EXECINSTR;
-  }
-
-  SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
-    SmallVector<std::unique_ptr<BPSymbol>> symbols;
-    for (Symbol *sym : isec->file->getSymbols())
-      if (auto *d = dyn_cast<Defined>(sym))
-        if (d->size > 0 && d->section == isec)
-          symbols.emplace_back(std::make_unique<BPSymbolELF>(sym));
-
-    return symbols;
-  }
-
-  std::optional<StringRef>
-  getResolvedLinkageName(llvm::StringRef name) const override {
-    return {};
-  }
-
-  void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
-                        const llvm::DenseMap<const void *, uint64_t>
-                            &sectionToIdx) const override;
-
-  static bool classof(const BPSectionBase *s) { return true; }
-};
+struct Ctx;
+class InputSectionBase;
 
 /// Run Balanced Partitioning to find the optimal function and data order to
 /// improve startup time and compressed size.
 ///
-/// It is important that -ffunction-sections and -fdata-sections are used to
-/// ensure functions and data are in their own sections and thus can be
-/// reordered.
+/// It is important that .subsections_via_symbols is used to ensure functions
+/// and data are in their own sections and thus can be reordered.
 llvm::DenseMap<const InputSectionBase *, int>
 runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
                         bool forFunctionCompression, bool forDataCompression,
                         bool compressionSortStartupFunctions, bool verbose);
+
 } // namespace lld::elf
 
 #endif

>From d01984f3f86ab4a0d9b502bd33af73aadee6fe99 Mon Sep 17 00:00:00 2001
From: Pengying Xu <xpy66swsry at gmail.com>
Date: Wed, 29 Jan 2025 16:24:28 +0800
Subject: [PATCH 07/10] fix: address code review feedback

---
 lld/ELF/BPSectionOrderer.cpp             |  2 +-
 lld/ELF/BPSectionOrderer.h               |  5 ++--
 lld/ELF/Options.td                       |  2 +-
 lld/test/ELF/bp-section-orderer-stress.s |  6 ++--
 lld/test/ELF/bp-section-orderer.s        | 37 +++++++++---------------
 5 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 07da7869a77ff1..a75e38dba414bf 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -42,7 +42,7 @@ struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
   }
 
   std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
-    return name;
+    return {};
   }
 
   static void
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index e9a58b85b1b0d3..a0cb1360005a6b 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -24,8 +24,9 @@ class InputSectionBase;
 /// Run Balanced Partitioning to find the optimal function and data order to
 /// improve startup time and compressed size.
 ///
-/// It is important that .subsections_via_symbols is used to ensure functions
-/// and data are in their own sections and thus can be reordered.
+/// It is important that -ffunction-sections and -fdata-sections compiler flags
+/// are used to ensure functions and data are in their own sections and thus
+/// can be reordered.
 llvm::DenseMap<const InputSectionBase *, int>
 runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
                         bool forFunctionCompression, bool forDataCompression,
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 1948a5a524170c..93852fe0890eb3 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -141,7 +141,7 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
 def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
   Flags<[HelpHidden]>;
 
-defm irpgo_profile: Eq<"irpgo-profile",
+defm irpgo_profile: EEq<"irpgo-profile",
     "Read the IRPGO profile for use with -bp-startup-sort and other profile-guided optimizations">;
 
 def bp_startup_sort: JJ<"bp-startup-sort=">,
diff --git a/lld/test/ELF/bp-section-orderer-stress.s b/lld/test/ELF/bp-section-orderer-stress.s
index ff10a24e381c25..da9670933949f9 100644
--- a/lld/test/ELF/bp-section-orderer-stress.s
+++ b/lld/test/ELF/bp-section-orderer-stress.s
@@ -7,9 +7,9 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
 # RUN: llvm-profdata merge %t.proftext -o %t.profdata
 
-# RUN: ld.lld --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
-# RUN: ld.lld --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
-# RUN: diff %t.order1.txt %t.order2.txt
+# RUN: ld.lld --icf=all -o %t1.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
+# RUN: ld.lld --icf=all -o %t2.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
+# RUN: cmp %t1.o %t2.o
 
 import random
 import sys
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index bbb74ad4aa6f4c..eef05e8c7ea44b 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -110,39 +110,30 @@ r2
 const char s1[] = "hello world";
 const char s2[] = "i am a string";
 const char s3[] = "this is s3";
-const char* r1 = s1;
-const char** r2 = &r1;
-const char*** r3 = &r2;
-const char* r4 = s2;
-void A() {
-    return;
-}
+const char *r1 = s1;
+const char **r2 = &r1;
+const char ***r3 = &r2;
+const char *r4 = s2;
+void A() { return; }
 
 int B(int a) {
-    A();
-    return a + 1;
+  A();
+  return a + 1;
 }
 
 int C(int a) {
-    A();
-    return a + 2;
+  A();
+  return a + 2;
 }
 
-int D(int a) {
-    return B(a + 2);
-}
+int D(int a) { return B(a + 2); }
 
-int E(int a) {
-    return C(a + 2);
-}
+int E(int a) { return C(a + 2); }
 
-int F(int a) {
-    return C(a + 3);
-}
+int F(int a) { return C(a + 3); }
+
+int _start() { return 0; }
 
-int _start() {
-    return 0;
-}
 #--- gen
 clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.c -o -
 ;--- a.s

>From 4dd0768932923d0fab711d6610a383d298b6ed19 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 2 Feb 2025 13:04:31 -0800
Subject: [PATCH 08/10] [cmake] Add ProfileData to fix BUILD_SHARED_LIBS=on
 builds

---
 lld/ELF/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 298443cd6ea42c..ec3f6382282b1f 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -73,6 +73,7 @@ add_lld_library(lldELF
   Object
   Option
   Passes
+  ProfileData
   Support
   TargetParser
   TransformUtils

>From 91fe6b3466e7f46ad11d6361f140552a834c03ed Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 2 Feb 2025 13:08:25 -0800
Subject: [PATCH 09/10] Simplify

---
 lld/ELF/BPSectionOrderer.cpp      | 23 +++-----
 lld/ELF/Driver.cpp                | 97 +++++++++++++++----------------
 lld/ELF/Options.td                | 23 +++-----
 lld/test/ELF/bp-section-orderer.s |  6 +-
 4 files changed, 69 insertions(+), 80 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index a75e38dba414bf..9a791a603327cc 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -14,8 +14,6 @@
 #include "lld/Common/BPSectionOrdererBase.inc"
 #include "llvm/Support/Endian.h"
 
-#define DEBUG_TYPE "bp-section-orderer"
-
 using namespace llvm;
 using namespace lld::elf;
 
@@ -86,24 +84,19 @@ DenseMap<const InputSectionBase *, int> lld::elf::runBalancedPartitioning(
   DenseSet<const InputSectionBase *> seenSections;
 
   auto addSection = [&](Symbol &sym) {
-    if (sym.getSize() == 0)
+    auto *d = dyn_cast<Defined>(&sym);
+    if (!d || d->size == 0)
       return;
-    if (auto *d = dyn_cast<Defined>(&sym))
-      if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
-        if (seenSections.insert(sec).second) {
-          size_t idx = sections.size();
-          sections.emplace_back(sec);
-          auto rootName = getRootSymbol(sym.getName());
-          rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
-          if (auto linkageName = BPOrdererELF::getResolvedLinkageName(rootName))
-            rootSymbolToSectionIdxs[CachedHashStringRef(*linkageName)].insert(
-                idx);
-        }
+    auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
+    if (sec && seenSections.insert(sec).second) {
+      rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
+          .insert(sections.size());
+      sections.emplace_back(sec);
+    }
   };
 
   for (Symbol *sym : ctx.symtab->getSymbols())
     addSection(*sym);
-
   for (ELFFileBase *file : ctx.objectFiles)
     for (Symbol *sym : file->getLocalSymbols())
       addSection(*sym);
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 6275549a13a2be..2835b86d05e9cc 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1118,6 +1118,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
   return CGProfileSortKind::None;
 }
 
+static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
+  if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
+    StringRef s = arg->getValue();
+    if (s == "function") {
+      ctx.arg.bpFunctionOrderForCompression = true;
+    } else if (s == "data") {
+      ctx.arg.bpDataOrderForCompression = true;
+    } else if (s == "both") {
+      ctx.arg.bpFunctionOrderForCompression = true;
+      ctx.arg.bpDataOrderForCompression = true;
+    } else if (s != "none") {
+      ErrAlways(ctx) << arg->getSpelling()
+                     << ": expected [none|function|data|both]";
+    }
+    if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
+      ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
+                        "--call-graph-ordering-file";
+  }
+  if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
+    StringRef s = arg->getValue();
+    if (s == "function") {
+      ctx.arg.bpStartupFunctionSort = true;
+    } else if (s != "none") {
+      ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
+    }
+    if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
+      ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
+                        "--call-graph-ordering-file";
+  }
+
+  ctx.arg.bpCompressionSortStartupFunctions =
+      args.hasFlag(OPT_bp_compression_sort_startup_functions,
+                   OPT_no_bp_compression_sort_startup_functions, false);
+  ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
+
+  ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
+  if (ctx.arg.irpgoProfilePath.empty()) {
+    if (ctx.arg.bpStartupFunctionSort)
+      ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
+                        "--irpgo-profile";
+    if (ctx.arg.bpCompressionSortStartupFunctions)
+      ErrAlways(ctx)
+          << "--bp-compression-sort-startup-functions must be used with "
+             "--irpgo-profile";
+  }
+}
+
 static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
                                                StringRef option) {
   DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
@@ -1259,55 +1306,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       ctx.arg.bsymbolic = BsymbolicKind::All;
   }
   ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
-  ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
-  ctx.arg.bpCompressionSortStartupFunctions =
-      args.hasFlag(OPT_bp_compression_sort_startup_functions,
-                   OPT_no_bp_compression_sort_startup_functions, false);
-  if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
-    StringRef startupSortStr = arg->getValue();
-    if (startupSortStr == "function") {
-      ctx.arg.bpStartupFunctionSort = true;
-    } else if (startupSortStr != "none") {
-      ErrAlways(ctx) << "unknown value '" + startupSortStr + "' for " +
-                            arg->getSpelling();
-    }
-    if (startupSortStr != "none")
-      if (args.hasArg(OPT_call_graph_ordering_file))
-        ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
-                          "--call-graph-ordering-file";
-  }
-  if (ctx.arg.irpgoProfilePath.empty()) {
-    if (ctx.arg.bpStartupFunctionSort)
-      ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
-                        "--irpgo-profile";
-    if (ctx.arg.bpCompressionSortStartupFunctions)
-      ErrAlways(ctx)
-          << "--bp-compression-sort-startup-functions must be used with "
-             "--irpgo-profile";
-  }
-
-  if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
-    StringRef compressionSortStr = arg->getValue();
-    if (compressionSortStr == "function") {
-      ctx.arg.bpFunctionOrderForCompression = true;
-    } else if (compressionSortStr == "data") {
-      ctx.arg.bpDataOrderForCompression = true;
-    } else if (compressionSortStr == "both") {
-      ctx.arg.bpFunctionOrderForCompression = true;
-      ctx.arg.bpDataOrderForCompression = true;
-    } else if (compressionSortStr != "none") {
-      ErrAlways(ctx) << "unknown value '" + compressionSortStr + "' for " +
-                            arg->getSpelling();
-    }
-    if (ctx.arg.bpDataOrderForCompression ||
-        ctx.arg.bpFunctionOrderForCompression) {
-      if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
-        ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
-                          "--call-graph-ordering-file";
-      }
-    }
-  }
-  ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
+  parseBPOrdererOptions(ctx, args);
   ctx.arg.checkSections =
       args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
   ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 93852fe0890eb3..80032490da0de4 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -142,23 +142,18 @@ def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArg
   Flags<[HelpHidden]>;
 
 defm irpgo_profile: EEq<"irpgo-profile",
-    "Read the IRPGO profile for use with -bp-startup-sort and other profile-guided optimizations">;
-
-def bp_startup_sort: JJ<"bp-startup-sort=">,
-    MetaVarName<"[none,function]">,
-    HelpText<"Order sections based on profile data to improve startup time">;
+  "Read a temporary profile file for use with --bp-startup-sort=">;
+def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
+  HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
+def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
+  HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;
 
+// Auxiliary options related to balanced partition
 defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
-    "Order startup functions by balanced partition to improve compressed size in addition to startup time",
-    "Do not order startup function for compression">;
-    
-def bp_compression_sort: JJ<"bp-compression-sort=">,
-    MetaVarName<"[none,function,data,both]">,
-    HelpText<"Order sections by balanced partition to improve compressed size">;
-
+  "When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
 def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
-    HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">;
-    
+  HelpText<"Print information on balanced partitioning">;
+
 // --chroot doesn't have a help text because it is an internal option.
 def chroot: Separate<["--"], "chroot">;
 
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index eef05e8c7ea44b..76fcad32f9639c 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -7,13 +7,15 @@
 # RUN: not ld.lld --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
 # RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
 # RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
-# RUN: not ld.lld --bp-compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-MALFORM
+# RUN: not ld.lld --bp-startup-sort=invalid --bp-compression-sort=invalid 2>&1 | FileCheck %s --check-prefix=BP-INVALID
 
 # BP-STARTUP-CALLGRAPH-ERR: error: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
 # BP-COMPRESSION-CALLGRAPH-ERR: error: --bp-compression-sort is incompatible with --call-graph-ordering-file
 # BP-STARTUP-ERR: error: --bp-startup-sort=function must be used with --irpgo-profile
 # BP-STARTUP-COMPRESSION-ERR: error: --bp-compression-sort-startup-functions must be used with --irpgo-profile
-# BP-COMPRESSION-MALFORM: error: unknown value 'malformed' for --bp-compression-sort=
+
+# BP-INVALID: error: --bp-compression-sort=: expected [none|function|data|both]
+# BP-INVALID: error: --bp-startup-sort=: expected [none|function]
 
 # RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
 # RUN: llvm-profdata merge a.proftext -o a.profdata

>From 1d3f56816be73c5ababf15d8f75e488bfb1a54b7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Sun, 2 Feb 2025 13:55:01 -0800
Subject: [PATCH 10/10] Ignore callGraphProfile when BP orderer is used

---
 lld/ELF/BPSectionOrderer.cpp | 2 +-
 lld/ELF/Writer.cpp           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 9a791a603327cc..40ddd4825ae6e0 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -74,7 +74,7 @@ struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
 };
 } // namespace
 
-DenseMap<const InputSectionBase *, int> lld::elf::runBalancedPartitioning(
+DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
     Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
     bool forDataCompression, bool compressionSortStartupFunctions,
     bool verbose) {
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 43ddff32dfe394..7e3a920df19ec3 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1092,9 +1092,9 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
         ctx.arg.bpDataOrderForCompression,
         ctx.arg.bpCompressionSortStartupFunctions,
         ctx.arg.bpVerboseSectionOrderer);
-  }
-  if (!ctx.arg.callGraphProfile.empty())
+  } else if (!ctx.arg.callGraphProfile.empty()) {
     sectionOrder = computeCallGraphProfileOrder(ctx);
+  }
 
   if (ctx.arg.symbolOrderingFile.empty())
     return sectionOrder;



More information about the llvm-commits mailing list