[lld] [lld][InstrProf] Sort startup functions for compression (PR #107348)
Ellis Hoag via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 10:08:40 PDT 2024
https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/107348
>From cca346ff49665f8c751ad1f42a189d017074fc3c Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Wed, 4 Sep 2024 19:42:00 -0700
Subject: [PATCH 1/3] [InstrProf][LLD] Refactor
* Rename `constructNodesForCompression()` -> `getUnsForCompression()`
* Pass `duplicateSectionIdxs` as a pointer to make it possible to skip
* Combine `duplicate{Function,Data}SectionIdxs` into one variable
* Compute all `BPFunctionNode` vectors at the end (like `nodesForStartup`)
---
lld/MachO/BPSectionOrderer.cpp | 118 +++++++++++++++++----------------
1 file changed, 62 insertions(+), 56 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 568843d72bbb50..3ba844f44fb791 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -60,12 +60,13 @@ getRelocHash(const Reloc &reloc,
return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
}
-static void constructNodesForCompression(
- const SmallVector<const InputSection *> §ions,
+static SmallVector<
+ std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
+getUnsForCompression(
+ ArrayRef<const InputSection *> sections,
const DenseMap<const InputSection *, uint64_t> §ionToIdx,
- const SmallVector<unsigned> §ionIdxs,
- std::vector<BPFunctionNode> &nodes,
- DenseMap<unsigned, SmallVector<unsigned>> &duplicateSectionIdxs,
+ ArrayRef<unsigned> sectionIdxs,
+ DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
BPFunctionNode::UtilityNodeT &maxUN) {
TimeTraceScope timeScope("Build nodes for compression");
@@ -103,49 +104,53 @@ static void constructNodesForCompression(
for (auto hash : hashes)
++hashFrequency[hash];
- // Merge section that are nearly identical
- SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
- DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
- for (auto &[sectionIdx, hashes] : sectionHashes) {
- uint64_t wholeHash = 0;
- for (auto hash : hashes)
- if (hashFrequency[hash] > 5)
- wholeHash ^= hash;
- auto [it, wasInserted] =
- wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
- if (wasInserted) {
- newSectionHashes.emplace_back(sectionIdx, hashes);
- } else {
- duplicateSectionIdxs[it->getSecond()].push_back(sectionIdx);
+ if (duplicateSectionIdxs) {
+ // Merge section that are nearly identical
+ SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
+ DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+ for (auto &[sectionIdx, hashes] : sectionHashes) {
+ uint64_t wholeHash = 0;
+ for (auto hash : hashes)
+ if (hashFrequency[hash] > 5)
+ wholeHash ^= hash;
+ auto [it, wasInserted] =
+ wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
+ if (wasInserted) {
+ newSectionHashes.emplace_back(sectionIdx, hashes);
+ } else {
+ (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
+ }
}
- }
- sectionHashes = newSectionHashes;
+ sectionHashes = newSectionHashes;
- // Recompute hash frequencies
- hashFrequency.clear();
- for (auto &[sectionIdx, hashes] : sectionHashes)
- for (auto hash : hashes)
- ++hashFrequency[hash];
+ // Recompute hash frequencies
+ hashFrequency.clear();
+ for (auto &[sectionIdx, hashes] : sectionHashes)
+ for (auto hash : hashes)
+ ++hashFrequency[hash];
+ }
// Filter rare and common hashes and assign each a unique utility node that
// doesn't conflict with the trace utility nodes
DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
for (auto &[hash, frequency] : hashFrequency) {
- if (frequency <= 1 || frequency * 2 > wholeHashToSectionIdx.size())
+ if (frequency <= 1 || frequency * 2 > sectionHashes.size())
continue;
hashToUN[hash] = ++maxUN;
}
- std::vector<BPFunctionNode::UtilityNodeT> uns;
+ SmallVector<std::pair<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>>
+ sectionUns;
for (auto &[sectionIdx, hashes] : sectionHashes) {
+ SmallVector<BPFunctionNode::UtilityNodeT> uns;
for (auto &hash : hashes) {
auto it = hashToUN.find(hash);
if (it != hashToUN.end())
uns.push_back(it->second);
}
- nodes.emplace_back(sectionIdx, uns);
- uns.clear();
+ sectionUns.emplace_back(sectionIdx, uns);
}
+ return sectionUns;
}
DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
@@ -185,10 +190,11 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
sectionIdxs.end());
}
- std::vector<BPFunctionNode> nodesForStartup;
BPFunctionNode::UtilityNodeT maxUN = 0;
DenseMap<unsigned, SmallVector<BPFunctionNode::UtilityNodeT>>
startupSectionIdxUNs;
+ // Used to define the initial order for startup functions.
+ DenseMap<unsigned, size_t> sectionIdxToTimestamp;
std::unique_ptr<InstrProfReader> reader;
if (!profilePath.empty()) {
auto fs = vfs::getRealFileSystem();
@@ -202,8 +208,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
}
auto &traces = reader->getTemporalProfTraces();
- // Used to define the initial order for startup functions.
- DenseMap<unsigned, size_t> sectionIdxToTimestamp;
DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
uint64_t currentSize = 0, cutoffSize = 1;
@@ -245,15 +249,6 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
++maxUN;
sectionIdxToFirstUN.clear();
}
-
- // These uns should already be sorted without duplicates.
- for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
- nodesForStartup.emplace_back(sectionIdx, uns);
-
- llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) {
- return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
- std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
- });
}
SmallVector<unsigned> sectionIdxsForFunctionCompression,
@@ -271,21 +266,32 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
}
}
- std::vector<BPFunctionNode> nodesForFunctionCompression,
- nodesForDataCompression;
// Map a section index (to be ordered for compression) to a list of duplicate
// section indices (not ordered for compression).
- DenseMap<unsigned, SmallVector<unsigned>> duplicateFunctionSectionIdxs,
- duplicateDataSectionIdxs;
- constructNodesForCompression(
+ DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
+ auto unsForFunctionCompression = getUnsForCompression(
sections, sectionToIdx, sectionIdxsForFunctionCompression,
- nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN);
- constructNodesForCompression(
+ &duplicateSectionIdxs, maxUN);
+ auto unsForDataCompression = getUnsForCompression(
sections, sectionToIdx, sectionIdxsForDataCompression,
- nodesForDataCompression, duplicateDataSectionIdxs, maxUN);
+ &duplicateSectionIdxs, maxUN);
- // Sort nodes by their Id (which is the section index) because the input
- // linker order tends to be not bad
+ std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
+ nodesForDataCompression;
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+ nodesForStartup.emplace_back(sectionIdx, uns);
+ for (auto &[sectionIdx, uns] : unsForFunctionCompression)
+ nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+ for (auto &[sectionIdx, uns] : unsForDataCompression)
+ nodesForDataCompression.emplace_back(sectionIdx, uns);
+
+ // Use the first timestamp to define the initial order for startup nodes.
+ llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) {
+ return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
+ std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
+ });
+ // Sort compression nodes by their Id (which is the section index) because the
+ // input linker order tends to be not bad.
llvm::sort(nodesForFunctionCompression,
[](auto &L, auto &R) { return L.Id < R.Id; });
llvm::sort(nodesForDataCompression,
@@ -318,8 +324,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
if (orderedSections.insert(isec))
++numCodeCompressionSections;
- auto It = duplicateFunctionSectionIdxs.find(node.Id);
- if (It == duplicateFunctionSectionIdxs.end())
+ auto It = duplicateSectionIdxs.find(node.Id);
+ if (It == duplicateSectionIdxs.end())
continue;
for (auto dupSecIdx : It->getSecond()) {
const auto *dupIsec = sections[dupSecIdx];
@@ -332,8 +338,8 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
const auto *isec = sections[node.Id];
if (orderedSections.insert(isec))
++numDataCompressionSections;
- auto It = duplicateDataSectionIdxs.find(node.Id);
- if (It == duplicateDataSectionIdxs.end())
+ auto It = duplicateSectionIdxs.find(node.Id);
+ if (It == duplicateSectionIdxs.end())
continue;
for (auto dupSecIdx : It->getSecond()) {
const auto *dupIsec = sections[dupSecIdx];
>From 29604b3f150e1017732eff68a0e05298c49797e9 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Wed, 4 Sep 2024 20:31:55 -0700
Subject: [PATCH 2/3] [InstrProf][LLD] Sort startup functions for compression
| | Page Fault Area | Compressed Size |
| --------------------- | --------------- | --------------- |
| Base | 1.045659e+09 | 25098246 |
| Startup Compression | 1.430330e+09 | 24740192 |
| Remove Startup Hashes | 2.272126e+09 | 24574929 |
---
lld/MachO/BPSectionOrderer.cpp | 20 +++++++++++++++++---
lld/MachO/BPSectionOrderer.h | 2 +-
lld/MachO/Config.h | 1 +
lld/MachO/Driver.cpp | 3 +++
lld/MachO/Options.td | 2 ++
lld/MachO/SectionPriorities.cpp | 1 +
lld/test/MachO/bp-section-orderer-stress.s | 4 ++--
7 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 3ba844f44fb791..71758070ddc8ae 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -155,7 +155,8 @@ getUnsForCompression(
DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
size_t &highestAvailablePriority, StringRef profilePath,
- bool forFunctionCompression, bool forDataCompression, bool verbose) {
+ bool forFunctionCompression, bool forDataCompression,
+ bool compressionSortStartupFunctions, bool verbose) {
SmallVector<const InputSection *> sections;
DenseMap<const InputSection *, uint64_t> sectionToIdx;
@@ -266,8 +267,15 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
}
}
- // Map a section index (to be ordered for compression) to a list of duplicate
- // section indices (not ordered for compression).
+ SmallVector<unsigned> startupIdxs;
+ if (compressionSortStartupFunctions)
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+ startupIdxs.push_back(sectionIdx);
+ auto unsForStartupFunctionCompression =
+ getUnsForCompression(sections, sectionToIdx, startupIdxs, nullptr, maxUN);
+
+ // Map a section index (order directly) to a list of duplicate section indices
+ // (not ordered directly).
DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
auto unsForFunctionCompression = getUnsForCompression(
sections, sectionToIdx, sectionIdxsForFunctionCompression,
@@ -276,6 +284,12 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
sections, sectionToIdx, sectionIdxsForDataCompression,
&duplicateSectionIdxs, maxUN);
+ for (auto &[sectionIdx, compressionUns] : unsForStartupFunctionCompression) {
+ auto &uns = startupSectionIdxUNs[sectionIdx];
+ uns.append(compressionUns);
+ llvm::sort(uns);
+ uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
+ }
std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
nodesForDataCompression;
for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 6f9eefd5d82beb..cefd0ceb10e561 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -30,7 +30,7 @@ llvm::DenseMap<const lld::macho::InputSection *, size_t>
runBalancedPartitioning(size_t &highestAvailablePriority,
llvm::StringRef profilePath,
bool forFunctionCompression, bool forDataCompression,
- bool verbose);
+ bool compressionSortStartupFunctions, bool verbose);
} // namespace lld::macho
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 5beb0662ba7274..17259ee75aa4db 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -218,6 +218,7 @@ struct Configuration {
llvm::StringRef printSymbolOrder;
llvm::StringRef irpgoProfileSortProfilePath;
+ bool compressionSortStartupFunctions = false;
bool functionOrderForCompression = false;
bool dataOrderForCompression = false;
bool verboseBpSectionOrderer = false;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 6a1ff96ed65697..514ee1933f2d1a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1772,6 +1772,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->irpgoProfileSortProfilePath = arg->getValue();
IncompatWithCGSort(arg->getSpelling());
}
+ config->compressionSortStartupFunctions =
+ args.hasFlag(OPT_compression_sort_startup_functions,
+ OPT_no_compression_sort_startup_functions, false);
if (const Arg *arg = args.getLastArg(OPT_compression_sort)) {
StringRef compressionSortStr = arg->getValue();
if (compressionSortStr == "function") {
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 9c9570cdbeb05c..4ac1e2080de5cc 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -131,6 +131,8 @@ def irpgo_profile_sort_eq: Joined<["--"], "irpgo-profile-sort=">,
Alias<!cast<Separate>(irpgo_profile_sort)>, MetaVarName<"<profile>">,
HelpText<"Read the IRPGO profile at <profile> to order sections to improve startup time">,
Group<grp_lld>;
+def compression_sort_startup_functions: Flag<["--"], "compression-sort-startup-functions">, HelpText<"TODO">, Group<grp_lld>;
+def no_compression_sort_startup_functions: Flag<["--"], "no-compression-sort-startup-functions">, HelpText<"TODO">, Group<grp_lld>;
def compression_sort: Joined<["--"], "compression-sort=">,
MetaVarName<"[none,function,data,both]">,
HelpText<"Order sections to improve compressed size">, Group<grp_lld>;
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 69c301d8ff8a71..1e7fb5973b8086 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -359,6 +359,7 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
sectionPriorities = runBalancedPartitioning(
highestAvailablePriority, config->irpgoProfileSortProfilePath,
config->functionOrderForCompression, config->dataOrderForCompression,
+ config->compressionSortStartupFunctions,
config->verboseBpSectionOrderer);
} else if (config->callGraphProfileSort) {
// Sort sections by the profile data provided by __LLVM,__cg_profile
diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s
index fdc6a20e2655b9..986e2d8fd1069b 100644
--- a/lld/test/MachO/bp-section-orderer-stress.s
+++ b/lld/test/MachO/bp-section-orderer-stress.s
@@ -7,8 +7,8 @@
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t.s -o %t.o
# RUN: llvm-profdata merge %t.proftext -o %t.profdata
-# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
-# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
# RUN: diff %t.order1.txt %t.order2.txt
import random
>From a9d9531fb72d695cc618cd9be63daee1f3e56607 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag at gmail.com>
Date: Thu, 5 Sep 2024 10:08:21 -0700
Subject: [PATCH 3/3] Add help message
---
lld/MachO/Driver.cpp | 4 ++++
lld/MachO/Options.td | 7 +++++--
lld/test/MachO/bp-section-orderer-errs.s | 3 +++
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 514ee1933f2d1a..12dcceecb56e14 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1775,6 +1775,10 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->compressionSortStartupFunctions =
args.hasFlag(OPT_compression_sort_startup_functions,
OPT_no_compression_sort_startup_functions, false);
+ if (config->irpgoProfileSortProfilePath.empty() &&
+ config->compressionSortStartupFunctions)
+ error("--compression-sort-startup-functions must be used with "
+ "--irpgo-profile-sort");
if (const Arg *arg = args.getLastArg(OPT_compression_sort)) {
StringRef compressionSortStr = arg->getValue();
if (compressionSortStr == "function") {
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4ac1e2080de5cc..f264c040c91794 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -131,8 +131,11 @@ def irpgo_profile_sort_eq: Joined<["--"], "irpgo-profile-sort=">,
Alias<!cast<Separate>(irpgo_profile_sort)>, MetaVarName<"<profile>">,
HelpText<"Read the IRPGO profile at <profile> to order sections to improve startup time">,
Group<grp_lld>;
-def compression_sort_startup_functions: Flag<["--"], "compression-sort-startup-functions">, HelpText<"TODO">, Group<grp_lld>;
-def no_compression_sort_startup_functions: Flag<["--"], "no-compression-sort-startup-functions">, HelpText<"TODO">, Group<grp_lld>;
+def compression_sort_startup_functions: Flag<["--"], "compression-sort-startup-functions">,
+ HelpText<"Order startup functions to improve compressed size in addition to startup time">,
+ Group<grp_lld>;
+def no_compression_sort_startup_functions: Flag<["--"], "no-compression-sort-startup-functions">,
+ HelpText<"Do not order startup function for compression">, Group<grp_lld>;
def compression_sort: Joined<["--"], "compression-sort=">,
MetaVarName<"[none,function,data,both]">,
HelpText<"Order sections to improve compressed size">, Group<grp_lld>;
diff --git a/lld/test/MachO/bp-section-orderer-errs.s b/lld/test/MachO/bp-section-orderer-errs.s
index 8392b88508481f..682eb0c08bf1f9 100644
--- a/lld/test/MachO/bp-section-orderer-errs.s
+++ b/lld/test/MachO/bp-section-orderer-errs.s
@@ -7,3 +7,6 @@
# RUN: not %lld -o /dev/null --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
# COMPRESSION-MALFORM: unknown value `malformed` for --compression-sort=
+
+# RUN: not %lld -o /dev/null --compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=STARTUP
+# STARTUP: --compression-sort-startup-functions must be used with --irpgo-profile-sort
More information about the llvm-commits
mailing list