[lld] 0154dce - [ELF] Add BPSectionOrderer options (#120514)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 2 17:33:23 PST 2025
Author: Pengying Xu
Date: 2025-02-02T17:33:19-08:00
New Revision: 0154dce8d39d2688b09f4e073fe601099a399365
URL: https://github.com/llvm/llvm-project/commit/0154dce8d39d2688b09f4e073fe601099a399365
DIFF: https://github.com/llvm/llvm-project/commit/0154dce8d39d2688b09f4e073fe601099a399365.diff
LOG: [ELF] Add BPSectionOrderer options (#120514)
Add new ELF linker options for profile-guided section ordering
optimizations:
- `--irpgo-profile=<file>`: Read IRPGO profile data for use with startup
and compression optimizations
- `--bp-startup-sort={none,function}`: Order sections based on profile
data to improve star tup time
- `--bp-compression-sort={none,function,data,both}`: Order sections
using balanced partitioning to improve compressed size
- `--bp-compression-sort-startup-functions`: Additionally optimize
startup functions for compression
- `--verbose-bp-section-orderer`: Print statistics about balanced
partitioning section ordering
Thanks to the @ellishg, @thevinster, and their team's work.
---------
Co-authored-by: Fangrui Song <i at maskray.me>
Added:
lld/ELF/BPSectionOrderer.cpp
lld/ELF/BPSectionOrderer.h
lld/test/ELF/bp-section-orderer-stress.s
lld/test/ELF/bp-section-orderer.s
Modified:
lld/ELF/CMakeLists.txt
lld/ELF/Config.h
lld/ELF/Driver.cpp
lld/ELF/Options.td
lld/ELF/Writer.cpp
lld/include/lld/Common/BPSectionOrdererBase.inc
Removed:
################################################################################
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
new file mode 100644
index 00000000000000..743fff07017da4
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -0,0 +1,95 @@
+//===- BPSectionOrderer.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPSectionOrderer.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "SymbolTable.h"
+#include "Symbols.h"
+#include "lld/Common/BPSectionOrdererBase.inc"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace lld::elf;
+
+namespace {
+struct BPOrdererELF;
+}
+template <> struct lld::BPOrdererTraits<struct BPOrdererELF> {
+ using Section = elf::InputSectionBase;
+ using Defined = elf::Defined;
+};
+namespace {
+struct BPOrdererELF : lld::BPOrderer<BPOrdererELF> {
+ DenseMap<const InputSectionBase *, Defined *> secToSym;
+
+ static uint64_t getSize(const Section &sec) { return sec.getSize(); }
+ static bool isCodeSection(const Section &sec) {
+ return sec.flags & llvm::ELF::SHF_EXECINSTR;
+ }
+ ArrayRef<Defined *> getSymbols(const Section &sec) {
+ auto it = secToSym.find(&sec);
+ if (it == secToSym.end())
+ return {};
+ return ArrayRef(it->second);
+ }
+
+ static void
+ getSectionHashes(const Section &sec, llvm::SmallVectorImpl<uint64_t> &hashes,
+ const llvm::DenseMap<const void *, uint64_t> §ionToIdx) {
+ constexpr unsigned windowSize = 4;
+
+ // Calculate content hashes: k-mers and the last k-1 bytes.
+ ArrayRef<uint8_t> data = sec.content();
+ if (data.size() >= windowSize)
+ for (size_t i = 0; i <= data.size() - windowSize; ++i)
+ hashes.push_back(llvm::support::endian::read32le(data.data() + i));
+ for (uint8_t byte : data.take_back(windowSize - 1))
+ hashes.push_back(byte);
+
+ llvm::sort(hashes);
+ hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+ }
+
+ static StringRef getSymName(const Defined &sym) { return sym.getName(); }
+ static uint64_t getSymValue(const Defined &sym) { return sym.value; }
+ static uint64_t getSymSize(const Defined &sym) { return sym.size; }
+};
+} // namespace
+
+DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
+ Ctx &ctx, StringRef profilePath, bool forFunctionCompression,
+ bool forDataCompression, bool compressionSortStartupFunctions,
+ bool verbose) {
+ // Collect candidate sections and associated symbols.
+ SmallVector<InputSectionBase *> sections;
+ DenseMap<CachedHashStringRef, DenseSet<unsigned>> rootSymbolToSectionIdxs;
+ BPOrdererELF orderer;
+
+ auto addSection = [&](Symbol &sym) {
+ auto *d = dyn_cast<Defined>(&sym);
+ if (!d)
+ return;
+ auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
+ if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
+ return;
+ rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
+ .insert(sections.size());
+ sections.emplace_back(sec);
+ };
+
+ for (Symbol *sym : ctx.symtab->getSymbols())
+ addSection(*sym);
+ for (ELFFileBase *file : ctx.objectFiles)
+ for (Symbol *sym : file->getLocalSymbols())
+ addSection(*sym);
+ return orderer.computeOrder(profilePath, forFunctionCompression,
+ forDataCompression,
+ compressionSortStartupFunctions, verbose,
+ sections, rootSymbolToSectionIdxs);
+}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
new file mode 100644
index 00000000000000..a0cb1360005a6b
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.h
@@ -0,0 +1,37 @@
+//===- BPSectionOrderer.h -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file uses Balanced Partitioning to order sections to improve startup
+/// time and compressed size.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_BPSECTION_ORDERER_H
+#define LLD_ELF_BPSECTION_ORDERER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace lld::elf {
+struct Ctx;
+class InputSectionBase;
+
+/// Run Balanced Partitioning to find the optimal function and data order to
+/// improve startup time and compressed size.
+///
+/// It is important that -ffunction-sections and -fdata-sections compiler flags
+/// are used to ensure functions and data are in their own sections and thus
+/// can be reordered.
+llvm::DenseMap<const InputSectionBase *, int>
+runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+ bool forFunctionCompression, bool forDataCompression,
+ bool compressionSortStartupFunctions, bool verbose);
+
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 83d816ddb0601e..ec3f6382282b1f 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_lld_library(lldELF
Arch/X86.cpp
Arch/X86_64.cpp
ARMErrataFix.cpp
+ BPSectionOrderer.cpp
CallGraphSort.cpp
DWARF.cpp
Driver.cpp
@@ -72,6 +73,7 @@ add_lld_library(lldELF
Object
Option
Passes
+ ProfileData
Support
TargetParser
TransformUtils
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index df262fdc811b09..3cdb400e423fd9 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -264,6 +264,12 @@ struct Config {
bool armBe8 = false;
BsymbolicKind bsymbolic = BsymbolicKind::None;
CGProfileSortKind callGraphProfileSort;
+ llvm::StringRef irpgoProfilePath;
+ bool bpStartupFunctionSort = false;
+ bool bpCompressionSortStartupFunctions = false;
+ bool bpFunctionOrderForCompression = false;
+ bool bpDataOrderForCompression = false;
+ bool bpVerboseSectionOrderer = false;
bool checkSections;
bool checkDynamicRelocs;
std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d92338608b059d..2835b86d05e9cc 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1118,6 +1118,53 @@ static CGProfileSortKind getCGProfileSortKind(Ctx &ctx,
return CGProfileSortKind::None;
}
+static void parseBPOrdererOptions(Ctx &ctx, opt::InputArgList &args) {
+ if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
+ StringRef s = arg->getValue();
+ if (s == "function") {
+ ctx.arg.bpFunctionOrderForCompression = true;
+ } else if (s == "data") {
+ ctx.arg.bpDataOrderForCompression = true;
+ } else if (s == "both") {
+ ctx.arg.bpFunctionOrderForCompression = true;
+ ctx.arg.bpDataOrderForCompression = true;
+ } else if (s != "none") {
+ ErrAlways(ctx) << arg->getSpelling()
+ << ": expected [none|function|data|both]";
+ }
+ if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
+ ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
+ "--call-graph-ordering-file";
+ }
+ if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
+ StringRef s = arg->getValue();
+ if (s == "function") {
+ ctx.arg.bpStartupFunctionSort = true;
+ } else if (s != "none") {
+ ErrAlways(ctx) << arg->getSpelling() << ": expected [none|function]";
+ }
+ if (s != "none" && args.hasArg(OPT_call_graph_ordering_file))
+ ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
+ "--call-graph-ordering-file";
+ }
+
+ ctx.arg.bpCompressionSortStartupFunctions =
+ args.hasFlag(OPT_bp_compression_sort_startup_functions,
+ OPT_no_bp_compression_sort_startup_functions, false);
+ ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
+
+ ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
+ if (ctx.arg.irpgoProfilePath.empty()) {
+ if (ctx.arg.bpStartupFunctionSort)
+ ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
+ "--irpgo-profile";
+ if (ctx.arg.bpCompressionSortStartupFunctions)
+ ErrAlways(ctx)
+ << "--bp-compression-sort-startup-functions must be used with "
+ "--irpgo-profile";
+ }
+}
+
static DebugCompressionType getCompressionType(Ctx &ctx, StringRef s,
StringRef option) {
DebugCompressionType type = StringSwitch<DebugCompressionType>(s)
@@ -1259,6 +1306,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.bsymbolic = BsymbolicKind::All;
}
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
+ parseBPOrdererOptions(ctx, args);
ctx.arg.checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c31875305952fb..80032490da0de4 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -141,6 +141,19 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
Flags<[HelpHidden]>;
+defm irpgo_profile: EEq<"irpgo-profile",
+ "Read a temporary profile file for use with --bp-startup-sort=">;
+def bp_compression_sort: JJ<"bp-compression-sort=">, MetaVarName<"[none,function,data,both]">,
+ HelpText<"Improve Lempel-Ziv compression by grouping similar sections together, resulting in a smaller compressed app size">;
+def bp_startup_sort: JJ<"bp-startup-sort=">, MetaVarName<"[none,function]">,
+ HelpText<"Utilize a temporal profile file to reduce page faults during program startup">;
+
+// Auxiliary options related to balanced partition
+defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
+ "When --irpgo-profile is pecified, prioritize function similarity for compression in addition to startup time", "">;
+def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
+ HelpText<"Print information on balanced partitioning">;
+
// --chroot doesn't have a help text because it is an internal option.
def chroot: Separate<["--"], "chroot">;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 487fb119a966b1..7e3a920df19ec3 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -9,6 +9,7 @@
#include "Writer.h"
#include "AArch64ErrataFix.h"
#include "ARMErrataFix.h"
+#include "BPSectionOrderer.h"
#include "CallGraphSort.h"
#include "Config.h"
#include "InputFiles.h"
@@ -1082,8 +1083,18 @@ static void maybeShuffle(Ctx &ctx,
// that don't appear in the order file.
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
DenseMap<const InputSectionBase *, int> sectionOrder;
- if (!ctx.arg.callGraphProfile.empty())
+ if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
+ ctx.arg.bpDataOrderForCompression) {
+ TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
+ sectionOrder = runBalancedPartitioning(
+ ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
+ ctx.arg.bpFunctionOrderForCompression,
+ ctx.arg.bpDataOrderForCompression,
+ ctx.arg.bpCompressionSortStartupFunctions,
+ ctx.arg.bpVerboseSectionOrderer);
+ } else if (!ctx.arg.callGraphProfile.empty()) {
sectionOrder = computeCallGraphProfileOrder(ctx);
+ }
if (ctx.arg.symbolOrderingFile.empty())
return sectionOrder;
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index cb6e7ebcbd96b7..ed0e0826ec057f 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -63,6 +63,10 @@ template <class D> struct BPOrderer {
const DenseMap<CachedHashStringRef, DenseSet<unsigned>>
&rootSymbolToSectionIdxs)
-> llvm::DenseMap<const Section *, int>;
+
+ std::optional<StringRef> static getResolvedLinkageName(llvm::StringRef name) {
+ return {};
+ }
};
} // namespace lld
@@ -97,10 +101,11 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
// Merge sections that are nearly identical
SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+ unsigned threshold = sectionHashes.size() > 10000 ? 5 : 0;
for (auto &[sectionIdx, hashes] : sectionHashes) {
uint64_t wholeHash = 0;
for (auto hash : hashes)
- if (hashFrequency[hash] > 5)
+ if (hashFrequency[hash] > threshold)
wholeHash ^= hash;
auto [it, wasInserted] =
wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
diff --git a/lld/test/ELF/bp-section-orderer-stress.s b/lld/test/ELF/bp-section-orderer-stress.s
new file mode 100644
index 00000000000000..da9670933949f9
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer-stress.s
@@ -0,0 +1,104 @@
+# REQUIRES: aarch64
+
+## Generate a large test case and check that the output is deterministic.
+
+# RUN: %python %s %t.s %t.proftext
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
+# RUN: llvm-profdata merge %t.proftext -o %t.profdata
+
+# RUN: ld.lld --icf=all -o %t1.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
+# RUN: ld.lld --icf=all -o %t2.o %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both
+# RUN: cmp %t1.o %t2.o
+
+import random
+import sys
+
+assembly_filepath = sys.argv[1]
+proftext_filepath = sys.argv[2]
+
+random.seed(1234)
+num_functions = 1000
+num_data = 100
+num_traces = 10
+
+function_names = [f"f{n}" for n in range(num_functions)]
+data_names = [f"d{n}" for n in range(num_data)]
+profiled_functions = function_names[: int(num_functions / 2)]
+
+function_contents = [
+ f"""
+{name}:
+ add w0, w0, #{i % 4096}
+ add w1, w1, #{i % 10}
+ add w2, w0, #{i % 20}
+ adrp x3, {name}
+ ret
+"""
+ for i, name in enumerate(function_names)
+]
+
+data_contents = [
+ f"""
+{name}:
+ .ascii "s{i % 2}-{i % 3}-{i % 5}"
+ .xword {name}
+"""
+ for i, name in enumerate(data_names)
+]
+
+trace_contents = [
+ f"""
+# Weight
+1
+{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
+"""
+ for i in range(num_traces)
+]
+
+profile_contents = [
+ f"""
+{name}
+# Func Hash:
+{i}
+# Num Counters:
+1
+# Counter Values:
+1
+"""
+ for i, name in enumerate(profiled_functions)
+]
+
+with open(assembly_filepath, "w") as f:
+ f.write(
+ f"""
+.text
+.globl _start
+
+_start:
+ ret
+
+{"".join(function_contents)}
+
+.data
+{"".join(data_contents)}
+
+"""
+ )
+
+with open(proftext_filepath, "w") as f:
+ f.write(
+ f"""
+:ir
+:temporal_prof_traces
+
+# Num Traces
+{num_traces}
+# Trace Stream Size:
+{num_traces}
+
+{"".join(trace_contents)}
+
+{"".join(profile_contents)}
+"""
+ )
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
new file mode 100644
index 00000000000000..1f3776280eae0e
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -0,0 +1,335 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t && cd %t
+
+## Check for incompatible cases
+# RUN: not ld.lld %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-CALLGRAPH-ERR
+# RUN: not ld.lld --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-CALLGRAPH-ERR
+# RUN: not ld.lld --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-ERR
+# RUN: not ld.lld --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=BP-STARTUP-COMPRESSION-ERR
+# RUN: not ld.lld --bp-startup-sort=invalid --bp-compression-sort=invalid 2>&1 | FileCheck %s --check-prefix=BP-INVALID
+
+# BP-STARTUP-CALLGRAPH-ERR: error: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
+# BP-COMPRESSION-CALLGRAPH-ERR: error: --bp-compression-sort is incompatible with --call-graph-ordering-file
+# BP-STARTUP-ERR: error: --bp-startup-sort=function must be used with --irpgo-profile
+# BP-STARTUP-COMPRESSION-ERR: error: --bp-compression-sort-startup-functions must be used with --irpgo-profile
+
+# BP-INVALID: error: --bp-compression-sort=: expected [none|function|data|both]
+# BP-INVALID: error: --bp-startup-sort=: expected [none|function]
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
+# RUN: llvm-profdata merge a.proftext -o a.profdata
+# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
+
+# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
+# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
+
+# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
+# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
+# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,
+
+# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
+# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
+# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d4,d3,d2,d1,
+
+# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
+# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,
+
+# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
+# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
+# CDATA: s4,s2,s1,s5,s3,F,C,E,D,B,A,_start,d4,d1,d3,d2,
+
+# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
+# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
+
+# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
+# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
+# CBOTH-STARTUP: s4,s2,s1,s5,s3,A,B,C,F,E,D,_start,d4,d1,d3,d2,
+
+# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+A, B, C
+
+A
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+B
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+C
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+D
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.txt
+A
+F
+E
+D
+s2
+s1
+r3
+r2
+
+#--- a.c
+const char s5[] = "engineering";
+const char s4[] = "computer program";
+const char s3[] = "hardware engineer";
+const char s2[] = "computer software";
+const char s1[] = "hello world program";
+int d4[] = {1,2,3,4,5,6};
+int d3[] = {5,6,7,8};
+int d2[] = {7,8,9,10};
+int d1[] = {3,4,5,6};
+
+int C(int a);
+int B(int a);
+void A();
+
+int F(int a) { return C(a + 3); }
+int E(int a) { return C(a + 2); }
+int D(int a) { return B(a + 2); }
+int C(int a) { A(); return a + 2; }
+int B(int a) { A(); return a + 1; }
+void A() {}
+
+int _start() { return 0; }
+
+#--- gen
+clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-asynchronous-unwind-tables -S a.c -o -
+;--- a.s
+ .file "a.c"
+ .section .text.F,"ax", at progbits
+ .globl F // -- Begin function F
+ .p2align 2
+ .type F, at function
+F: // @F
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #3
+ bl C
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end0:
+ .size F, .Lfunc_end0-F
+ // -- End function
+ .section .text.C,"ax", at progbits
+ .globl C // -- Begin function C
+ .p2align 2
+ .type C, at function
+C: // @C
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ bl A
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end1:
+ .size C, .Lfunc_end1-C
+ // -- End function
+ .section .text.E,"ax", at progbits
+ .globl E // -- Begin function E
+ .p2align 2
+ .type E, at function
+E: // @E
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ bl C
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end2:
+ .size E, .Lfunc_end2-E
+ // -- End function
+ .section .text.D,"ax", at progbits
+ .globl D // -- Begin function D
+ .p2align 2
+ .type D, at function
+D: // @D
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ bl B
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end3:
+ .size D, .Lfunc_end3-D
+ // -- End function
+ .section .text.B,"ax", at progbits
+ .globl B // -- Begin function B
+ .p2align 2
+ .type B, at function
+B: // @B
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ bl A
+ ldur w8, [x29, #-4]
+ add w0, w8, #1
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end4:
+ .size B, .Lfunc_end4-B
+ // -- End function
+ .section .text.A,"ax", at progbits
+ .globl A // -- Begin function A
+ .p2align 2
+ .type A, at function
+A: // @A
+// %bb.0: // %entry
+ ret
+.Lfunc_end5:
+ .size A, .Lfunc_end5-A
+ // -- End function
+ .section .text._start,"ax", at progbits
+ .globl _start // -- Begin function _start
+ .p2align 2
+ .type _start, at function
+_start: // @_start
+// %bb.0: // %entry
+ mov w0, wzr
+ ret
+.Lfunc_end6:
+ .size _start, .Lfunc_end6-_start
+ // -- End function
+ .type s5, at object // @s5
+ .section .rodata.s5,"a", at progbits
+ .globl s5
+s5:
+ .asciz "engineering"
+ .size s5, 12
+
+ .type s4, at object // @s4
+ .section .rodata.s4,"a", at progbits
+ .globl s4
+s4:
+ .asciz "computer program"
+ .size s4, 17
+
+ .type s3, at object // @s3
+ .section .rodata.s3,"a", at progbits
+ .globl s3
+s3:
+ .asciz "hardware engineer"
+ .size s3, 18
+
+ .type s2, at object // @s2
+ .section .rodata.s2,"a", at progbits
+ .globl s2
+s2:
+ .asciz "computer software"
+ .size s2, 18
+
+ .type s1, at object // @s1
+ .section .rodata.s1,"a", at progbits
+ .globl s1
+s1:
+ .asciz "hello world program"
+ .size s1, 20
+
+ .type d4, at object // @d4
+ .section .data.d4,"aw", at progbits
+ .globl d4
+ .p2align 2, 0x0
+d4:
+ .word 1 // 0x1
+ .word 2 // 0x2
+ .word 3 // 0x3
+ .word 4 // 0x4
+ .word 5 // 0x5
+ .word 6 // 0x6
+ .size d4, 24
+
+ .type d3, at object // @d3
+ .section .data.d3,"aw", at progbits
+ .globl d3
+ .p2align 2, 0x0
+d3:
+ .word 5 // 0x5
+ .word 6 // 0x6
+ .word 7 // 0x7
+ .word 8 // 0x8
+ .size d3, 16
+
+ .type d2, at object // @d2
+ .section .data.d2,"aw", at progbits
+ .globl d2
+ .p2align 2, 0x0
+d2:
+ .word 7 // 0x7
+ .word 8 // 0x8
+ .word 9 // 0x9
+ .word 10 // 0xa
+ .size d2, 16
+
+ .type d1, at object // @d1
+ .section .data.d1,"aw", at progbits
+ .globl d1
+ .p2align 2, 0x0
+d1:
+ .word 3 // 0x3
+ .word 4 // 0x4
+ .word 5 // 0x5
+ .word 6 // 0x6
+ .size d1, 16
+
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .addrsig_sym C
+ .addrsig_sym B
+ .addrsig_sym A
More information about the llvm-commits
mailing list