[lld] [lld][ELF] Extend profile guided function ordering to ELF binaries (PR #117514)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 20:46:35 PST 2024
https://github.com/Colibrow updated https://github.com/llvm/llvm-project/pull/117514
>From 7355aae2de8480ab233d2059bb83426612451bdf Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Mon, 25 Nov 2024 11:10:04 +0800
Subject: [PATCH 1/9] [lld][ELF] Extend profile guided function ordering to ELF
binaries
Extend balanced partitioning implementation to support ELF binaries, enabling
the same startup time and compressed size optimizations previously available for
MachO.
This allows ELF binaries to benefit from profile-guided function ordering
and compression-based section ordering.
Add the lld flags `--irpgo-profile-sort=<profile>` and
`--compression-sort={function,data,both}`.
Thanks to the ellishg, thevinster, and their team's work.
---
lld/Common/BPSectionOrdererBase.cpp | 379 ++++++++++++++++
lld/Common/CMakeLists.txt | 1 +
lld/ELF/BPSectionOrderer.cpp | 65 +++
lld/ELF/BPSectionOrderer.h | 139 ++++++
lld/ELF/CMakeLists.txt | 1 +
lld/ELF/Config.h | 6 +
lld/ELF/Driver.cpp | 49 +++
lld/ELF/Options.td | 18 +
lld/ELF/Writer.cpp | 10 +
lld/MachO/BPSectionOrderer.cpp | 412 +-----------------
lld/MachO/BPSectionOrderer.h | 129 +++++-
lld/include/lld/Common/BPSectionOrdererBase.h | 76 ++++
lld/test/ELF/bp-section-orderer-stress.s | 104 +++++
lld/test/ELF/bp-section-orderer.s | 269 ++++++++++++
lld/test/ELF/incompatible.s | 13 +
15 files changed, 1273 insertions(+), 398 deletions(-)
create mode 100644 lld/Common/BPSectionOrdererBase.cpp
create mode 100644 lld/ELF/BPSectionOrderer.cpp
create mode 100644 lld/ELF/BPSectionOrderer.h
create mode 100644 lld/include/lld/Common/BPSectionOrdererBase.h
create mode 100644 lld/test/ELF/bp-section-orderer-stress.s
create mode 100644 lld/test/ELF/bp-section-orderer.s
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
new file mode 100644
index 00000000000000..e65ce99375cd49
--- /dev/null
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -0,0 +1,379 @@
+//===- BPSectionOrdererBase.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lld/Common/BPSectionOrdererBase.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/xxhash.h"
+
+#define DEBUG_TYPE "bp-section-orderer"
+using namespace llvm;
+using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
+
+namespace lld {
+
+static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
+ ArrayRef<const BPSectionBase *> sections,
+ const DenseMap<const BPSectionBase *, uint64_t> §ionToIdx,
+ ArrayRef<unsigned> sectionIdxs,
+ DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
+ BPFunctionNode::UtilityNodeT &maxUN) {
+ TimeTraceScope timeScope("Build nodes for compression");
+
+ SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
+ sectionHashes.reserve(sectionIdxs.size());
+ SmallVector<uint64_t> hashes;
+
+ for (unsigned sectionIdx : sectionIdxs) {
+ const auto *isec = sections[sectionIdx];
+ isec->getSectionHash(hashes);
+ sectionHashes.emplace_back(sectionIdx, std::move(hashes));
+ hashes.clear();
+ }
+
+ DenseMap<uint64_t, unsigned> hashFrequency;
+ for (auto &[sectionIdx, hashes] : sectionHashes)
+ for (auto hash : hashes)
+ ++hashFrequency[hash];
+
+ if (duplicateSectionIdxs) {
+ // Merge sections that are nearly identical
+ SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
+ DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
+ for (auto &[sectionIdx, hashes] : sectionHashes) {
+ uint64_t wholeHash = 0;
+ for (auto hash : hashes)
+ if (hashFrequency[hash] > 5)
+ wholeHash ^= hash;
+ auto [it, wasInserted] =
+ wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
+ if (wasInserted) {
+ newSectionHashes.emplace_back(sectionIdx, hashes);
+ } else {
+ (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
+ }
+ }
+ sectionHashes = newSectionHashes;
+
+ // Recompute hash frequencies
+ hashFrequency.clear();
+ for (auto &[sectionIdx, hashes] : sectionHashes)
+ for (auto hash : hashes)
+ ++hashFrequency[hash];
+ }
+
+ // Filter rare and common hashes and assign each a unique utility node that
+ // doesn't conflict with the trace utility nodes
+ DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
+ for (auto &[hash, frequency] : hashFrequency) {
+ if (frequency <= 1 || frequency * 2 > sectionHashes.size())
+ continue;
+ hashToUN[hash] = ++maxUN;
+ }
+
+ SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
+ for (auto &[sectionIdx, hashes] : sectionHashes) {
+ UtilityNodes uns;
+ for (auto &hash : hashes) {
+ auto it = hashToUN.find(hash);
+ if (it != hashToUN.end())
+ uns.push_back(it->second);
+ }
+ sectionUns.emplace_back(sectionIdx, uns);
+ }
+ return sectionUns;
+}
+
+llvm::DenseMap<const BPSectionBase *, size_t>
+BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
+ size_t &highestAvailablePriority, llvm::StringRef profilePath,
+ bool forFunctionCompression, bool forDataCompression,
+ bool compressionSortStartupFunctions, bool verbose,
+ SmallVector<std::unique_ptr<BPSectionBase>> &inputSections) {
+ TimeTraceScope timeScope("Setup Balanced Partitioning");
+ SmallVector<const BPSectionBase *> sections;
+ DenseMap<const BPSectionBase *, uint64_t> sectionToIdx;
+ StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
+
+ // Process input sections
+ for (const auto &isec : inputSections) {
+ if (!isec->hasValidData())
+ continue;
+
+ unsigned sectionIdx = sections.size();
+ sectionToIdx.try_emplace(isec.get(), sectionIdx);
+ sections.emplace_back(isec.get());
+ for (auto &sym : isec->getSymbols())
+ if (auto *d = sym->asDefinedSymbol())
+ symbolToSectionIdxs[d->getName()].insert(sectionIdx);
+ }
+ StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
+ for (auto &entry : symbolToSectionIdxs) {
+ StringRef name = entry.getKey();
+ auto §ionIdxs = entry.getValue();
+ name = BPSectionBase::getRootSymbol(name);
+ rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+ sectionIdxs.end());
+ if (sections[*sectionIdxs.begin()]->needResolveLinkageName(name))
+ rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
+ sectionIdxs.end());
+ }
+
+ BPFunctionNode::UtilityNodeT maxUN = 0;
+ DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
+ // Used to define the initial order for startup functions.
+ DenseMap<unsigned, size_t> sectionIdxToTimestamp;
+ std::unique_ptr<InstrProfReader> reader;
+ if (!profilePath.empty()) {
+ auto fs = vfs::getRealFileSystem();
+ auto readerOrErr = InstrProfReader::create(profilePath, *fs);
+ lld::checkError(readerOrErr.takeError());
+
+ reader = std::move(readerOrErr.get());
+ for (auto &entry : *reader) {
+ // Read all entries
+ (void)entry;
+ }
+ auto &traces = reader->getTemporalProfTraces();
+
+ DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
+ for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
+ uint64_t currentSize = 0, cutoffSize = 1;
+ size_t cutoffTimestamp = 1;
+ auto &trace = traces[traceIdx].FunctionNameRefs;
+ for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
+ auto [Filename, ParsedFuncName] = getParsedIRPGOName(
+ reader->getSymtab().getFuncOrVarName(trace[timestamp]));
+ ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
+
+ auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
+ if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
+ continue;
+ auto §ionIdxs = sectionIdxsIt->getValue();
+ // If the same symbol is found in multiple sections, they might be
+ // identical, so we arbitrarily use the size from the first section.
+ currentSize += sections[*sectionIdxs.begin()]->getSize();
+
+ // Since BalancedPartitioning is sensitive to the initial order, we need
+ // to explicitly define it to be ordered by earliest timestamp.
+ for (unsigned sectionIdx : sectionIdxs) {
+ auto [it, wasInserted] =
+ sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
+ if (!wasInserted)
+ it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
+ }
+
+ if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
+ ++maxUN;
+ cutoffSize = 2 * currentSize;
+ cutoffTimestamp = 2 * cutoffTimestamp;
+ }
+ for (unsigned sectionIdx : sectionIdxs)
+ sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
+ }
+ for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
+ for (auto un = firstUN; un <= maxUN; ++un)
+ startupSectionIdxUNs[sectionIdx].push_back(un);
+ ++maxUN;
+ sectionIdxToFirstUN.clear();
+ }
+ }
+
+ SmallVector<unsigned> sectionIdxsForFunctionCompression,
+ sectionIdxsForDataCompression;
+ for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
+ if (startupSectionIdxUNs.count(sectionIdx))
+ continue;
+ const auto *isec = sections[sectionIdx];
+ if (isec->isCodeSection()) {
+ if (forFunctionCompression)
+ sectionIdxsForFunctionCompression.push_back(sectionIdx);
+ } else {
+ if (forDataCompression)
+ sectionIdxsForDataCompression.push_back(sectionIdx);
+ }
+ }
+
+ if (compressionSortStartupFunctions) {
+ SmallVector<unsigned> startupIdxs;
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+ startupIdxs.push_back(sectionIdx);
+ auto unsForStartupFunctionCompression =
+ getUnsForCompression(sections, sectionToIdx, startupIdxs,
+ /*duplicateSectionIdxs=*/nullptr, maxUN);
+ for (auto &[sectionIdx, compressionUns] :
+ unsForStartupFunctionCompression) {
+ auto &uns = startupSectionIdxUNs[sectionIdx];
+ uns.append(compressionUns);
+ llvm::sort(uns);
+ uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
+ }
+ }
+
+ // Map a section index (order directly) to a list of duplicate section indices
+ // (not ordered directly).
+ DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
+ auto unsForFunctionCompression = getUnsForCompression(
+ sections, sectionToIdx, sectionIdxsForFunctionCompression,
+ &duplicateSectionIdxs, maxUN);
+ auto unsForDataCompression = getUnsForCompression(
+ sections, sectionToIdx, sectionIdxsForDataCompression,
+ &duplicateSectionIdxs, maxUN);
+
+ std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
+ nodesForDataCompression;
+ for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
+ nodesForStartup.emplace_back(sectionIdx, uns);
+ for (auto &[sectionIdx, uns] : unsForFunctionCompression)
+ nodesForFunctionCompression.emplace_back(sectionIdx, uns);
+ for (auto &[sectionIdx, uns] : unsForDataCompression)
+ nodesForDataCompression.emplace_back(sectionIdx, uns);
+
+ // Use the first timestamp to define the initial order for startup nodes.
+ llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) {
+ return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
+ std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
+ });
+ // Sort compression nodes by their Id (which is the section index) because the
+ // input linker order tends to be not bad.
+ llvm::sort(nodesForFunctionCompression,
+ [](auto &L, auto &R) { return L.Id < R.Id; });
+ llvm::sort(nodesForDataCompression,
+ [](auto &L, auto &R) { return L.Id < R.Id; });
+
+ {
+ TimeTraceScope timeScope("Balanced Partitioning");
+ BalancedPartitioningConfig config;
+ BalancedPartitioning bp(config);
+ bp.run(nodesForStartup);
+ bp.run(nodesForFunctionCompression);
+ bp.run(nodesForDataCompression);
+ }
+
+ unsigned numStartupSections = 0;
+ unsigned numCodeCompressionSections = 0;
+ unsigned numDuplicateCodeSections = 0;
+ unsigned numDataCompressionSections = 0;
+ unsigned numDuplicateDataSections = 0;
+ SetVector<const BPSectionBase *> orderedSections;
+ // Order startup functions,
+ for (auto &node : nodesForStartup) {
+ const auto *isec = sections[node.Id];
+ if (orderedSections.insert(isec))
+ ++numStartupSections;
+ }
+ // then functions for compression,
+ for (auto &node : nodesForFunctionCompression) {
+ const auto *isec = sections[node.Id];
+ if (orderedSections.insert(isec))
+ ++numCodeCompressionSections;
+
+ auto It = duplicateSectionIdxs.find(node.Id);
+ if (It == duplicateSectionIdxs.end())
+ continue;
+ for (auto dupSecIdx : It->getSecond()) {
+ const auto *dupIsec = sections[dupSecIdx];
+ if (orderedSections.insert(dupIsec))
+ ++numDuplicateCodeSections;
+ }
+ }
+ // then data for compression.
+ for (auto &node : nodesForDataCompression) {
+ const auto *isec = sections[node.Id];
+ if (orderedSections.insert(isec))
+ ++numDataCompressionSections;
+ auto It = duplicateSectionIdxs.find(node.Id);
+ if (It == duplicateSectionIdxs.end())
+ continue;
+ for (auto dupSecIdx : It->getSecond()) {
+ const auto *dupIsec = sections[dupSecIdx];
+ if (orderedSections.insert(dupIsec))
+ ++numDuplicateDataSections;
+ }
+ }
+
+ if (verbose) {
+ unsigned numTotalOrderedSections =
+ numStartupSections + numCodeCompressionSections +
+ numDuplicateCodeSections + numDataCompressionSections +
+ numDuplicateDataSections;
+ dbgs()
+ << "Ordered " << numTotalOrderedSections
+ << " sections using balanced partitioning:\n Functions for startup: "
+ << numStartupSections
+ << "\n Functions for compression: " << numCodeCompressionSections
+ << "\n Duplicate functions: " << numDuplicateCodeSections
+ << "\n Data for compression: " << numDataCompressionSections
+ << "\n Duplicate data: " << numDuplicateDataSections << "\n";
+
+ if (!profilePath.empty()) {
+ // Evaluate this function order for startup
+ StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
+ const uint64_t pageSize = (1 << 14);
+ uint64_t currentAddress = 0;
+ for (const auto *isec : orderedSections) {
+ for (auto &sym : isec->getSymbols()) {
+ if (auto *d = sym->asDefinedSymbol()) {
+ uint64_t startAddress = currentAddress + d->getValue().value_or(0);
+ uint64_t endAddress = startAddress + d->getSize().value_or(0);
+ uint64_t firstPage = startAddress / pageSize;
+ // I think the kernel might pull in a few pages when one it touched,
+ // so it might be more accurate to force lastPage to be aligned by
+ // 4?
+ uint64_t lastPage = endAddress / pageSize;
+ StringRef rootSymbol = d->getName();
+ rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
+ symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+ if (isec->needResolveLinkageName(rootSymbol))
+ symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+ }
+ }
+ currentAddress += isec->getSize();
+ }
+
+ // The area under the curve F where F(t) is the total number of page
+ // faults at step t.
+ unsigned area = 0;
+ for (auto &trace : reader->getTemporalProfTraces()) {
+ SmallSet<uint64_t, 0> touchedPages;
+ for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
+ auto traceId = trace.FunctionNameRefs[step];
+ auto [Filename, ParsedFuncName] =
+ getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
+ ParsedFuncName = BPSectionBase::getRootSymbol(ParsedFuncName);
+ auto it = symbolToPageNumbers.find(ParsedFuncName);
+ if (it != symbolToPageNumbers.end()) {
+ auto &[firstPage, lastPage] = it->getValue();
+ for (uint64_t i = firstPage; i <= lastPage; i++)
+ touchedPages.insert(i);
+ }
+ area += touchedPages.size();
+ }
+ }
+ dbgs() << "Total area under the page fault curve: " << (float)area
+ << "\n";
+ }
+ }
+
+ DenseMap<const BPSectionBase *, size_t> sectionPriorities;
+ for (const auto *isec : orderedSections)
+ sectionPriorities[isec] = --highestAvailablePriority;
+ return sectionPriorities;
+}
+
+} // namespace lld
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 4f503d04f7844f..2ab5093bf6887d 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -24,6 +24,7 @@ set_source_files_properties("${version_inc}"
add_lld_library(lldCommon
Args.cpp
+ BPSectionOrdererBase.cpp
CommonLinkerContext.cpp
DriverDispatcher.cpp
DWARF.cpp
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
new file mode 100644
index 00000000000000..5d9f28a9af7402
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -0,0 +1,65 @@
+//===- BPSectionOrderer.cpp------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPSectionOrderer.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "lld/Common/BPSectionOrdererBase.h"
+#include "lld/Common/CommonLinkerContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/Support/TimeProfiler.h"
+
+#include "SymbolTable.h"
+#include "Symbols.h"
+
+using namespace llvm;
+using namespace lld::elf;
+
+llvm::DenseMap<const lld::elf::InputSectionBase *, int>
+lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+ bool forFunctionCompression,
+ bool forDataCompression,
+ bool compressionSortStartupFunctions,
+ bool verbose) {
+ size_t highestAvailablePriority = std::numeric_limits<int>::max();
+ // Collect all InputSectionBase objects from symbols and wrap them as
+ // BPSectionELF instances for balanced partitioning which follow the way
+ // '--symbol-ordering-file' does.
+ SmallVector<std::unique_ptr<BPSectionBase>> sections;
+
+ for (Symbol *sym : ctx.symtab->getSymbols())
+ if (sym->getSize() > 0)
+ if (auto *d = dyn_cast<Defined>(sym))
+ if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+ sections.emplace_back(std::make_unique<BPSectionELF>(
+ sec, std::make_unique<BPSymbolELF>(sym)));
+
+ for (ELFFileBase *file : ctx.objectFiles)
+ for (Symbol *sym : file->getLocalSymbols())
+ if (sym->getSize() > 0)
+ if (auto *d = dyn_cast<Defined>(sym))
+ if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
+ sections.emplace_back(std::make_unique<BPSectionELF>(
+ sec, std::make_unique<BPSymbolELF>(sym)));
+
+ auto reorderedSections =
+ lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
+ highestAvailablePriority, profilePath, forFunctionCompression,
+ forDataCompression, compressionSortStartupFunctions, verbose,
+ sections);
+
+ DenseMap<const InputSectionBase *, int> result;
+ for (const auto &[sec, priority] : reorderedSections) {
+ auto *elfSection = cast<BPSectionELF>(sec);
+ result.try_emplace(elfSection->getSymbol()->getInputSection(),
+ static_cast<int>(priority));
+ }
+ return result;
+}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
new file mode 100644
index 00000000000000..73a44833ae003e
--- /dev/null
+++ b/lld/ELF/BPSectionOrderer.h
@@ -0,0 +1,139 @@
+//===- BPSectionOrderer.h -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file uses Balanced Partitioning to order sections to improve startup
+/// time and compressed size.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_ELF_BPSECTION_ORDERER_H
+#define LLD_ELF_BPSECTION_ORDERER_H
+
+#include "InputFiles.h"
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "lld/Common/BPSectionOrdererBase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/xxhash.h"
+
+namespace lld::elf {
+
+class InputSection;
+
+class BPSymbolELF : public BPSymbol {
+ const Symbol *sym;
+
+public:
+ explicit BPSymbolELF(const Symbol *s) : sym(s) {}
+
+ llvm::StringRef getName() const override { return sym->getName(); }
+
+ const Defined *asDefined() const {
+ return llvm::dyn_cast_or_null<Defined>(sym);
+ }
+
+ BPSymbol *asDefinedSymbol() override { return asDefined() ? this : nullptr; }
+
+ std::optional<uint64_t> getValue() const override {
+ if (auto *d = asDefined())
+ return d->value;
+ return {};
+ }
+
+ std::optional<uint64_t> getSize() const override {
+ if (auto *d = asDefined())
+ return d->size;
+ return {};
+ }
+
+ InputSectionBase *getInputSection() const {
+ if (auto *d = llvm::dyn_cast<Defined>(sym))
+ return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
+ return nullptr;
+ }
+
+ const Symbol *getSymbol() const { return sym; }
+};
+
+class BPSectionELF : public BPSectionBase {
+ const InputSectionBase *isec;
+ std::unique_ptr<BPSymbolELF> symbol;
+
+public:
+ explicit BPSectionELF(const InputSectionBase *sec,
+ std::unique_ptr<BPSymbolELF> sym)
+ : isec(sec), symbol(std::move(sym)) {}
+
+ const InputSectionBase *getSection() const { return isec; }
+
+ BPSymbolELF *getSymbol() const { return symbol.get(); }
+ llvm::StringRef getName() const override { return isec->name; }
+
+ uint64_t getSize() const override { return isec->getSize(); }
+
+ bool isCodeSection() const override {
+ return isec->flags & llvm::ELF::SHF_EXECINSTR;
+ }
+
+ bool hasValidData() const override {
+ return isec && !isec->content().empty();
+ }
+
+ llvm::ArrayRef<uint8_t> getSectionData() const override {
+ return isec->content();
+ }
+
+ llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
+ return llvm::ArrayRef<std::unique_ptr<BPSymbol>>(
+ reinterpret_cast<const std::unique_ptr<BPSymbol> *>(&symbol), 1);
+ }
+
+ bool needResolveLinkageName(llvm::StringRef &name) const override {
+ return false;
+ }
+
+ void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ constexpr unsigned windowSize = 4;
+
+ // Calculate content hashes
+ size_t size = isec->content().size();
+ for (size_t i = 0; i < size; i++) {
+ auto window = isec->content().drop_front(i).take_front(windowSize);
+ hashes.push_back(xxHash64(window));
+ }
+
+ // TODO: Calculate relocation hashes.
+ // Since in ELF, relocations are complex, but the effect without them are
+ // good enough, we just use 0 as their hash.
+
+ llvm::sort(hashes);
+ hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+ }
+
+ static bool classof(const BPSectionBase *s) { return true; }
+};
+
+/// Run Balanced Partitioning to find the optimal function and data order to
+/// improve startup time and compressed size.
+///
+/// It is important that -ffunction-sections and -fdata-sections are used to
+/// ensure functions and data are in their own sections and thus can be
+/// reordered.
+llvm::DenseMap<const lld::elf::InputSectionBase *, int>
+runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
+ bool forFunctionCompression, bool forDataCompression,
+ bool compressionSortStartupFunctions, bool verbose);
+} // namespace lld::elf
+
+#endif
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 83d816ddb0601e..298443cd6ea42c 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -37,6 +37,7 @@ add_lld_library(lldELF
Arch/X86.cpp
Arch/X86_64.cpp
ARMErrataFix.cpp
+ BPSectionOrderer.cpp
CallGraphSort.cpp
DWARF.cpp
Driver.cpp
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a2836733c2715e..d2766d1bda921d 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -264,6 +264,12 @@ struct Config {
bool armBe8 = false;
BsymbolicKind bsymbolic = BsymbolicKind::None;
CGProfileSortKind callGraphProfileSort;
+ llvm::StringRef irpgoProfilePath;
+ bool bpStartupFunctionSort = false;
+ bool bpCompressionSortStartupFunctions = false;
+ bool bpFunctionOrderForCompression = false;
+ bool bpDataOrderForCompression = false;
+ bool bpVerboseSectionOrderer = false;
bool checkSections;
bool checkDynamicRelocs;
std::optional<llvm::DebugCompressionType> compressDebugSections;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 3c553e5043180b..8bc2598e93f276 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1255,6 +1255,55 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.bsymbolic = BsymbolicKind::All;
}
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
+ ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
+ ctx.arg.bpCompressionSortStartupFunctions =
+ args.hasFlag(OPT_bp_compression_sort_startup_functions,
+ OPT_no_bp_compression_sort_startup_functions, false);
+ if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
+ StringRef startupSortStr = arg->getValue();
+ if (startupSortStr == "function") {
+ ctx.arg.bpStartupFunctionSort = true;
+ } else if (startupSortStr != "none") {
+ ErrAlways(ctx) << "unknown value '" + startupSortStr + "' for " +
+ arg->getSpelling();
+ }
+ if (startupSortStr != "none")
+ if (args.hasArg(OPT_call_graph_ordering_file))
+ ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
+ "--call-graph-ordering-file";
+ }
+ if (ctx.arg.irpgoProfilePath.empty()) {
+ if (ctx.arg.bpStartupFunctionSort)
+ ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
+ "--irpgo-profile";
+ if (ctx.arg.bpCompressionSortStartupFunctions)
+ ErrAlways(ctx)
+ << "--bp-compression-sort-startup-functions must be used with "
+ "--irpgo-profile";
+ }
+
+ if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
+ StringRef compressionSortStr = arg->getValue();
+ if (compressionSortStr == "function") {
+ ctx.arg.bpFunctionOrderForCompression = true;
+ } else if (compressionSortStr == "data") {
+ ctx.arg.bpDataOrderForCompression = true;
+ } else if (compressionSortStr == "both") {
+ ctx.arg.bpFunctionOrderForCompression = true;
+ ctx.arg.bpDataOrderForCompression = true;
+ } else if (compressionSortStr != "none") {
+ ErrAlways(ctx) << "unknown value '" + compressionSortStr + "' for " +
+ arg->getSpelling();
+ }
+ if (ctx.arg.bpDataOrderForCompression ||
+ ctx.arg.bpFunctionOrderForCompression) {
+ if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
+ ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
+ "--call-graph-ordering-file";
+ }
+ }
+ }
+ ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
ctx.arg.checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index ebe77204264210..eb769b31e290cb 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -141,6 +141,24 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
Flags<[HelpHidden]>;
+defm irpgo_profile: Eq<"irpgo-profile",
+ "Read the IRPGO profile for use with -bp-startup-sort and other profile-guided optimizations">;
+
+def bp_startup_sort: JJ<"bp-startup-sort=">,
+ MetaVarName<"[none,function]">,
+ HelpText<"Order sections based on profile data to improve startup time">;
+
+defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
+ "Order startup functions by balanced partition to improve compressed size in addition to startup time",
+ "Do not order startup function for compression">;
+
+def bp_compression_sort: JJ<"bp-compression-sort=">,
+ MetaVarName<"[none,function,data,both]">,
+ HelpText<"Order sections by balanced partition to improve compressed size">;
+
+def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
+ HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">;
+
// --chroot doesn't have a help text because it is an internal option.
def chroot: Separate<["--"], "chroot">;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 6c16549bfa6c04..6ac7af93b78861 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -9,6 +9,7 @@
#include "Writer.h"
#include "AArch64ErrataFix.h"
#include "ARMErrataFix.h"
+#include "BPSectionOrderer.h"
#include "CallGraphSort.h"
#include "Config.h"
#include "InputFiles.h"
@@ -1083,6 +1084,15 @@ static void maybeShuffle(Ctx &ctx,
// Builds section order for handling --symbol-ordering-file.
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
DenseMap<const InputSectionBase *, int> sectionOrder;
+ if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
+ ctx.arg.bpDataOrderForCompression) {
+ TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
+ sectionOrder = runBalancedPartitioning(
+ ctx, ctx.arg.irpgoProfilePath, ctx.arg.bpFunctionOrderForCompression,
+ ctx.arg.bpDataOrderForCompression,
+ ctx.arg.bpCompressionSortStartupFunctions,
+ ctx.arg.bpVerboseSectionOrderer);
+ }
// Use the rarely used option --call-graph-ordering-file to sort sections.
if (!ctx.arg.callGraphProfile.empty())
return computeCallGraphProfileOrder(ctx);
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 5db2242a35ef28..721770f4b4a27f 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,425 +11,43 @@
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/BalancedPartitioning.h"
#include "llvm/Support/TimeProfiler.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/xxhash.h"
#define DEBUG_TYPE "bp-section-orderer"
+
using namespace llvm;
using namespace lld::macho;
-using UtilityNodes = SmallVector<BPFunctionNode::UtilityNodeT>;
-
-/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
-/// "yyyy" are numbers that could change between builds. We need to use the root
-/// symbol name before this suffix so these symbols can be matched with profiles
-/// which may have different suffixes.
-static StringRef getRootSymbol(StringRef Name) {
- auto [P0, S0] = Name.rsplit(".llvm.");
- auto [P1, S1] = P0.rsplit(".__uniq.");
- return P1;
-}
-
-static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx,
- uint64_t offset, uint64_t addend) {
- return xxHash64((kind + ": " + Twine::utohexstr(sectionIdx) + " + " +
- Twine::utohexstr(offset) + " + " + Twine::utohexstr(addend))
- .str());
-}
-
-static uint64_t
-getRelocHash(const Reloc &reloc,
- const DenseMap<const InputSection *, uint64_t> §ionToIdx) {
- auto *isec = reloc.getReferentInputSection();
- std::optional<uint64_t> sectionIdx;
- auto sectionIdxIt = sectionToIdx.find(isec);
- if (sectionIdxIt != sectionToIdx.end())
- sectionIdx = sectionIdxIt->getSecond();
- std::string kind;
- if (isec)
- kind = ("Section " + Twine(static_cast<uint8_t>(isec->kind()))).str();
- if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
- kind += (" Symbol " + Twine(static_cast<uint8_t>(sym->kind()))).str();
- if (auto *d = dyn_cast<Defined>(sym))
- return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend);
- }
- return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
-}
-
-/// Given \p sectionIdxs, a list of section indexes, return a list of utility
-/// nodes for each section index. If \p duplicateSectionIdx is provided,
-/// populate it with nearly identical sections. Increment \p maxUN to be the
-/// largest utility node we have used so far.
-static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
- ArrayRef<const InputSection *> sections,
- const DenseMap<const InputSection *, uint64_t> §ionToIdx,
- ArrayRef<unsigned> sectionIdxs,
- DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
- BPFunctionNode::UtilityNodeT &maxUN) {
- TimeTraceScope timeScope("Build nodes for compression");
-
- SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> sectionHashes;
- sectionHashes.reserve(sectionIdxs.size());
- SmallVector<uint64_t> hashes;
- for (unsigned sectionIdx : sectionIdxs) {
- const auto *isec = sections[sectionIdx];
- constexpr unsigned windowSize = 4;
-
- for (size_t i = 0; i < isec->data.size(); i++) {
- auto window = isec->data.drop_front(i).take_front(windowSize);
- hashes.push_back(xxHash64(window));
- }
- for (const auto &r : isec->relocs) {
- if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
- continue;
- uint64_t relocHash = getRelocHash(r, sectionToIdx);
- uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
- for (uint32_t i = start; i < r.offset + r.length; i++) {
- auto window = isec->data.drop_front(i).take_front(windowSize);
- hashes.push_back(xxHash64(window) + relocHash);
- }
- }
-
- llvm::sort(hashes);
- hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
-
- sectionHashes.emplace_back(sectionIdx, hashes);
- hashes.clear();
- }
-
- DenseMap<uint64_t, unsigned> hashFrequency;
- for (auto &[sectionIdx, hashes] : sectionHashes)
- for (auto hash : hashes)
- ++hashFrequency[hash];
-
- if (duplicateSectionIdxs) {
- // Merge section that are nearly identical
- SmallVector<std::pair<unsigned, SmallVector<uint64_t>>> newSectionHashes;
- DenseMap<uint64_t, unsigned> wholeHashToSectionIdx;
- for (auto &[sectionIdx, hashes] : sectionHashes) {
- uint64_t wholeHash = 0;
- for (auto hash : hashes)
- if (hashFrequency[hash] > 5)
- wholeHash ^= hash;
- auto [it, wasInserted] =
- wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx));
- if (wasInserted) {
- newSectionHashes.emplace_back(sectionIdx, hashes);
- } else {
- (*duplicateSectionIdxs)[it->getSecond()].push_back(sectionIdx);
- }
- }
- sectionHashes = newSectionHashes;
-
- // Recompute hash frequencies
- hashFrequency.clear();
- for (auto &[sectionIdx, hashes] : sectionHashes)
- for (auto hash : hashes)
- ++hashFrequency[hash];
- }
-
- // Filter rare and common hashes and assign each a unique utility node that
- // doesn't conflict with the trace utility nodes
- DenseMap<uint64_t, BPFunctionNode::UtilityNodeT> hashToUN;
- for (auto &[hash, frequency] : hashFrequency) {
- if (frequency <= 1 || frequency * 2 > sectionHashes.size())
- continue;
- hashToUN[hash] = ++maxUN;
- }
-
- SmallVector<std::pair<unsigned, UtilityNodes>> sectionUns;
- for (auto &[sectionIdx, hashes] : sectionHashes) {
- UtilityNodes uns;
- for (auto &hash : hashes) {
- auto it = hashToUN.find(hash);
- if (it != hashToUN.end())
- uns.push_back(it->second);
- }
- sectionUns.emplace_back(sectionIdx, uns);
- }
- return sectionUns;
-}
-
DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
size_t &highestAvailablePriority, StringRef profilePath,
bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose) {
- SmallVector<const InputSection *> sections;
- DenseMap<const InputSection *, uint64_t> sectionToIdx;
- StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
+ SmallVector<std::unique_ptr<BPSectionBase>> sections;
for (const auto *file : inputFiles) {
for (auto *sec : file->sections) {
for (auto &subsec : sec->subsections) {
auto *isec = subsec.isec;
if (!isec || isec->data.empty() || !isec->data.data())
continue;
- unsigned sectionIdx = sections.size();
- sectionToIdx.try_emplace(isec, sectionIdx);
- sections.push_back(isec);
- for (Symbol *sym : isec->symbols)
- if (auto *d = dyn_cast_or_null<Defined>(sym))
- symbolToSectionIdxs[d->getName()].insert(sectionIdx);
- }
- }
- }
-
- StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
- for (auto &entry : symbolToSectionIdxs) {
- StringRef name = entry.getKey();
- auto §ionIdxs = entry.getValue();
- name = getRootSymbol(name);
- rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
- sectionIdxs.end());
- // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
- // Mangler::getNameWithPrefix() for details.
- if (name.consume_front("_") || name.consume_front("l_"))
- rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
- sectionIdxs.end());
- }
-
- BPFunctionNode::UtilityNodeT maxUN = 0;
- DenseMap<unsigned, UtilityNodes> startupSectionIdxUNs;
- // Used to define the initial order for startup functions.
- DenseMap<unsigned, size_t> sectionIdxToTimestamp;
- std::unique_ptr<InstrProfReader> reader;
- if (!profilePath.empty()) {
- auto fs = vfs::getRealFileSystem();
- auto readerOrErr = InstrProfReader::create(profilePath, *fs);
- lld::checkError(readerOrErr.takeError());
-
- reader = std::move(readerOrErr.get());
- for (auto &entry : *reader) {
- // Read all entries
- (void)entry;
- }
- auto &traces = reader->getTemporalProfTraces();
-
- DenseMap<unsigned, BPFunctionNode::UtilityNodeT> sectionIdxToFirstUN;
- for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) {
- uint64_t currentSize = 0, cutoffSize = 1;
- size_t cutoffTimestamp = 1;
- auto &trace = traces[traceIdx].FunctionNameRefs;
- for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
- auto [Filename, ParsedFuncName] = getParsedIRPGOName(
- reader->getSymtab().getFuncOrVarName(trace[timestamp]));
- ParsedFuncName = getRootSymbol(ParsedFuncName);
-
- auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName);
- if (sectionIdxsIt == rootSymbolToSectionIdxs.end())
- continue;
- auto §ionIdxs = sectionIdxsIt->getValue();
- // If the same symbol is found in multiple sections, they might be
- // identical, so we arbitrarily use the size from the first section.
- currentSize += sections[*sectionIdxs.begin()]->getSize();
-
- // Since BalancedPartitioning is sensitive to the initial order, we need
- // to explicitly define it to be ordered by earliest timestamp.
- for (unsigned sectionIdx : sectionIdxs) {
- auto [it, wasInserted] =
- sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp);
- if (!wasInserted)
- it->getSecond() = std::min<size_t>(it->getSecond(), timestamp);
- }
-
- if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) {
- ++maxUN;
- cutoffSize = 2 * currentSize;
- cutoffTimestamp = 2 * cutoffTimestamp;
- }
- for (unsigned sectionIdx : sectionIdxs)
- sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN);
+ sections.emplace_back(
+ std::make_unique<BPSectionMacho>(isec, sections.size()));
}
- for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN)
- for (auto un = firstUN; un <= maxUN; ++un)
- startupSectionIdxUNs[sectionIdx].push_back(un);
- ++maxUN;
- sectionIdxToFirstUN.clear();
}
}
- SmallVector<unsigned> sectionIdxsForFunctionCompression,
- sectionIdxsForDataCompression;
- for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) {
- if (startupSectionIdxUNs.count(sectionIdx))
- continue;
- const auto *isec = sections[sectionIdx];
- if (isCodeSection(isec)) {
- if (forFunctionCompression)
- sectionIdxsForFunctionCompression.push_back(sectionIdx);
- } else {
- if (forDataCompression)
- sectionIdxsForDataCompression.push_back(sectionIdx);
- }
- }
+ auto reorderedSections =
+ lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
+ highestAvailablePriority, profilePath, forFunctionCompression,
+ forDataCompression, compressionSortStartupFunctions, verbose,
+ sections);
- if (compressionSortStartupFunctions) {
- SmallVector<unsigned> startupIdxs;
- for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
- startupIdxs.push_back(sectionIdx);
- auto unsForStartupFunctionCompression =
- getUnsForCompression(sections, sectionToIdx, startupIdxs,
- /*duplicateSectionIdxs=*/nullptr, maxUN);
- for (auto &[sectionIdx, compressionUns] :
- unsForStartupFunctionCompression) {
- auto &uns = startupSectionIdxUNs[sectionIdx];
- uns.append(compressionUns);
- llvm::sort(uns);
- uns.erase(std::unique(uns.begin(), uns.end()), uns.end());
+ DenseMap<const InputSection *, size_t> result;
+ for (const auto &[sec, priority] : reorderedSections) {
+ if (auto *machoSection = dyn_cast<BPSectionMacho>(sec)) {
+ result.try_emplace(machoSection->getSection(), priority);
}
}
-
- // Map a section index (order directly) to a list of duplicate section indices
- // (not ordered directly).
- DenseMap<unsigned, SmallVector<unsigned>> duplicateSectionIdxs;
- auto unsForFunctionCompression = getUnsForCompression(
- sections, sectionToIdx, sectionIdxsForFunctionCompression,
- &duplicateSectionIdxs, maxUN);
- auto unsForDataCompression = getUnsForCompression(
- sections, sectionToIdx, sectionIdxsForDataCompression,
- &duplicateSectionIdxs, maxUN);
-
- std::vector<BPFunctionNode> nodesForStartup, nodesForFunctionCompression,
- nodesForDataCompression;
- for (auto &[sectionIdx, uns] : startupSectionIdxUNs)
- nodesForStartup.emplace_back(sectionIdx, uns);
- for (auto &[sectionIdx, uns] : unsForFunctionCompression)
- nodesForFunctionCompression.emplace_back(sectionIdx, uns);
- for (auto &[sectionIdx, uns] : unsForDataCompression)
- nodesForDataCompression.emplace_back(sectionIdx, uns);
-
- // Use the first timestamp to define the initial order for startup nodes.
- llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) {
- return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) <
- std::make_pair(sectionIdxToTimestamp[R.Id], R.Id);
- });
- // Sort compression nodes by their Id (which is the section index) because the
- // input linker order tends to be not bad.
- llvm::sort(nodesForFunctionCompression,
- [](auto &L, auto &R) { return L.Id < R.Id; });
- llvm::sort(nodesForDataCompression,
- [](auto &L, auto &R) { return L.Id < R.Id; });
-
- {
- TimeTraceScope timeScope("Balanced Partitioning");
- BalancedPartitioningConfig config;
- BalancedPartitioning bp(config);
- bp.run(nodesForStartup);
- bp.run(nodesForFunctionCompression);
- bp.run(nodesForDataCompression);
- }
-
- unsigned numStartupSections = 0;
- unsigned numCodeCompressionSections = 0;
- unsigned numDuplicateCodeSections = 0;
- unsigned numDataCompressionSections = 0;
- unsigned numDuplicateDataSections = 0;
- SetVector<const InputSection *> orderedSections;
- // Order startup functions,
- for (auto &node : nodesForStartup) {
- const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
- ++numStartupSections;
- }
- // then functions for compression,
- for (auto &node : nodesForFunctionCompression) {
- const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
- ++numCodeCompressionSections;
-
- auto It = duplicateSectionIdxs.find(node.Id);
- if (It == duplicateSectionIdxs.end())
- continue;
- for (auto dupSecIdx : It->getSecond()) {
- const auto *dupIsec = sections[dupSecIdx];
- if (orderedSections.insert(dupIsec))
- ++numDuplicateCodeSections;
- }
- }
- // then data for compression.
- for (auto &node : nodesForDataCompression) {
- const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
- ++numDataCompressionSections;
- auto It = duplicateSectionIdxs.find(node.Id);
- if (It == duplicateSectionIdxs.end())
- continue;
- for (auto dupSecIdx : It->getSecond()) {
- const auto *dupIsec = sections[dupSecIdx];
- if (orderedSections.insert(dupIsec))
- ++numDuplicateDataSections;
- }
- }
-
- if (verbose) {
- unsigned numTotalOrderedSections =
- numStartupSections + numCodeCompressionSections +
- numDuplicateCodeSections + numDataCompressionSections +
- numDuplicateDataSections;
- dbgs()
- << "Ordered " << numTotalOrderedSections
- << " sections using balanced partitioning:\n Functions for startup: "
- << numStartupSections
- << "\n Functions for compression: " << numCodeCompressionSections
- << "\n Duplicate functions: " << numDuplicateCodeSections
- << "\n Data for compression: " << numDataCompressionSections
- << "\n Duplicate data: " << numDuplicateDataSections << "\n";
-
- if (!profilePath.empty()) {
- // Evaluate this function order for startup
- StringMap<std::pair<uint64_t, uint64_t>> symbolToPageNumbers;
- const uint64_t pageSize = (1 << 14);
- uint64_t currentAddress = 0;
- for (const auto *isec : orderedSections) {
- for (Symbol *sym : isec->symbols) {
- if (auto *d = dyn_cast_or_null<Defined>(sym)) {
- uint64_t startAddress = currentAddress + d->value;
- uint64_t endAddress = startAddress + d->size;
- uint64_t firstPage = startAddress / pageSize;
- // I think the kernel might pull in a few pages when one it touched,
- // so it might be more accurate to force lastPage to be aligned by
- // 4?
- uint64_t lastPage = endAddress / pageSize;
- StringRef rootSymbol = d->getName();
- rootSymbol = getRootSymbol(rootSymbol);
- symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
- if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_"))
- symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
- }
- }
-
- currentAddress += isec->getSize();
- }
-
- // The area under the curve F where F(t) is the total number of page
- // faults at step t.
- unsigned area = 0;
- for (auto &trace : reader->getTemporalProfTraces()) {
- SmallSet<uint64_t, 0> touchedPages;
- for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) {
- auto traceId = trace.FunctionNameRefs[step];
- auto [Filename, ParsedFuncName] =
- getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
- ParsedFuncName = getRootSymbol(ParsedFuncName);
- auto it = symbolToPageNumbers.find(ParsedFuncName);
- if (it != symbolToPageNumbers.end()) {
- auto &[firstPage, lastPage] = it->getValue();
- for (uint64_t i = firstPage; i <= lastPage; i++)
- touchedPages.insert(i);
- }
- area += touchedPages.size();
- }
- }
- dbgs() << "Total area under the page fault curve: " << (float)area
- << "\n";
- }
- }
-
- DenseMap<const InputSection *, size_t> sectionPriorities;
- for (const auto *isec : orderedSections)
- sectionPriorities[isec] = --highestAvailablePriority;
- return sectionPriorities;
+ return result;
}
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index cefd0ceb10e561..95f0e2aac73863 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -1,4 +1,4 @@
-//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===//
+//===- BPSectionOrderer.h -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,13 +14,140 @@
#ifndef LLD_MACHO_BPSECTION_ORDERER_H
#define LLD_MACHO_BPSECTION_ORDERER_H
+#include "InputSection.h"
+#include "Relocations.h"
+#include "Symbols.h"
+#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
namespace lld::macho {
class InputSection;
+class BPSymbolMacho : public BPSymbol {
+ const Symbol *sym;
+
+public:
+ explicit BPSymbolMacho(const Symbol *s) : sym(s) {}
+
+ llvm::StringRef getName() const override { return sym->getName(); }
+
+ const Defined *asDefined() const {
+ return llvm::dyn_cast_or_null<Defined>(sym);
+ }
+
+ BPSymbol *asDefinedSymbol() override { return asDefined() ? this : nullptr; }
+
+ std::optional<uint64_t> getValue() const override {
+ if (auto *d = asDefined())
+ return d->value;
+ return {};
+ }
+
+ std::optional<uint64_t> getSize() const override {
+ if (auto *d = asDefined())
+ return d->size;
+ return {};
+ }
+
+ const Symbol *getSymbol() const { return sym; }
+};
+
+class BPSectionMacho : public BPSectionBase {
+ const InputSection *isec;
+ uint64_t sectionIdx;
+ mutable std::vector<std::unique_ptr<BPSymbol>> symbols;
+
+public:
+ explicit BPSectionMacho(const InputSection *sec, uint64_t sectionIdx)
+ : isec(sec), sectionIdx(sectionIdx) {}
+
+ const InputSection *getSection() const { return isec; }
+
+ llvm::StringRef getName() const override { return isec->getName(); }
+
+ uint64_t getSize() const override { return isec->getSize(); }
+
+ uint64_t getSectionIdx() const { return sectionIdx; }
+
+ bool isCodeSection() const override { return macho::isCodeSection(isec); }
+
+ bool hasValidData() const override {
+ return isec && !isec->data.empty() && isec->data.data();
+ }
+
+ llvm::ArrayRef<uint8_t> getSectionData() const override { return isec->data; }
+
+ llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
+ for (auto *d : isec->symbols) {
+ symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
+ }
+ return symbols;
+ }
+
+ // Linkage names can be prefixed with "_" or "l_" on Mach-O. See
+ // Mangler::getNameWithPrefix() for details.
+ bool needResolveLinkageName(llvm::StringRef &name) const override {
+ return (name.consume_front("_") || name.consume_front("l_"));
+ }
+
+ void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ constexpr unsigned windowSize = 4;
+
+ // Calculate content hashes
+ size_t dataSize = isec->data.size();
+ for (size_t i = 0; i < dataSize; i++) {
+ auto window = isec->data.drop_front(i).take_front(windowSize);
+ hashes.push_back(xxHash64(window));
+ }
+
+ // Calculate relocation hashes
+ for (const auto &r : isec->relocs) {
+ if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
+ continue;
+
+ uint64_t relocHash = getRelocHash(r, this);
+ uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
+ for (uint32_t i = start; i < r.offset + r.length; i++) {
+ auto window = isec->data.drop_front(i).take_front(windowSize);
+ hashes.push_back(xxHash64(window) + relocHash);
+ }
+ }
+
+ llvm::sort(hashes);
+ hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
+ }
+
+ const InputSection *getInputSection() const { return isec; }
+
+ static bool classof(const BPSectionBase *s) { return true; }
+
+private:
+ static uint64_t getRelocHash(const Reloc &reloc,
+ const BPSectionMacho *section) {
+ auto *isec = reloc.getReferentInputSection();
+ std::optional<uint64_t> sectionIdx;
+ if (isec && isec == section->getSection())
+ sectionIdx = section->getSectionIdx();
+
+ std::string kind;
+ if (isec)
+ kind = ("Section " + Twine(isec->kind())).str();
+
+ if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
+ kind += (" Symbol " + Twine(sym->kind())).str();
+ if (auto *d = llvm::dyn_cast<Defined>(sym)) {
+ return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
+ d->value, reloc.addend);
+ }
+ }
+ return BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0), 0,
+ reloc.addend);
+ }
+};
+
/// Run Balanced Partitioning to find the optimal function and data order to
/// improve startup time and compressed size.
///
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
new file mode 100644
index 00000000000000..e4cff1020d1dc1
--- /dev/null
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -0,0 +1,76 @@
+//===- BPSectionOrdererBase.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interfaces which may be used by
+// BPSectionOrderer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COMMON_BP_SECTION_ORDERER_BASE_H
+#define LLD_COMMON_BP_SECTION_ORDERER_BASE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/xxhash.h"
+
+namespace lld {
+
+class BPSymbol {
+
+public:
+ virtual ~BPSymbol() = default;
+ virtual llvm::StringRef getName() const = 0;
+ virtual BPSymbol *asDefinedSymbol() = 0;
+ virtual std::optional<uint64_t> getValue() const = 0;
+ virtual std::optional<uint64_t> getSize() const = 0;
+};
+
+class BPSectionBase {
+public:
+ virtual ~BPSectionBase() = default;
+ virtual llvm::StringRef getName() const = 0;
+ virtual uint64_t getSize() const = 0;
+ virtual bool hasValidData() const = 0;
+ virtual bool isCodeSection() const = 0;
+ virtual llvm::ArrayRef<uint8_t> getSectionData() const = 0;
+ virtual llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
+ virtual void
+ getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const = 0;
+ virtual bool needResolveLinkageName(llvm::StringRef &name) const = 0;
+ static llvm::StringRef getRootSymbol(llvm::StringRef Name) {
+ auto [P0, S0] = Name.rsplit(".llvm.");
+ auto [P1, S1] = P0.rsplit(".__uniq.");
+ return P1;
+ }
+
+ static uint64_t getRelocHash(llvm::StringRef kind, uint64_t sectionIdx,
+ uint64_t offset, uint64_t addend) {
+ return llvm::xxHash64((kind + ": " + llvm::Twine::utohexstr(sectionIdx) +
+ " + " + llvm::Twine::utohexstr(offset) + " + " +
+ llvm::Twine::utohexstr(addend))
+ .str());
+ }
+};
+/// Base class for Balanced Partitioning section ordering, providing common
+/// functionality for both ELF and MachO formats. This shared implementation
+/// reduces code duplication while handling function and data reordering.
+class BPSectionOrdererBase {
+public:
+ static llvm::DenseMap<const BPSectionBase *, size_t>
+ reorderSectionsByBalancedPartitioning(
+ size_t &highestAvailablePriority, llvm::StringRef profilePath,
+ bool forFunctionCompression, bool forDataCompression,
+ bool compressionSortStartupFunctions, bool verbose,
+ llvm::SmallVector<std::unique_ptr<BPSectionBase>> &inputSections);
+};
+
+} // namespace lld
+
+#endif
diff --git a/lld/test/ELF/bp-section-orderer-stress.s b/lld/test/ELF/bp-section-orderer-stress.s
new file mode 100644
index 00000000000000..a25afafdadca28
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer-stress.s
@@ -0,0 +1,104 @@
+# REQUIRES: aarch64
+
+# Generate a large test case and check that the output is deterministic.
+
+# RUN: %python %s %t.s %t.proftext
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t.s -o %t.o
+# RUN: llvm-profdata merge %t.proftext -o %t.profdata
+
+# RUN: ld.lld -e _main --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt
+# RUN: ld.lld -e _main --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt
+# RUN: diff %t.order1.txt %t.order2.txt
+
+import random
+import sys
+
+assembly_filepath = sys.argv[1]
+proftext_filepath = sys.argv[2]
+
+random.seed(1234)
+num_functions = 1000
+num_data = 100
+num_traces = 10
+
+function_names = [f"f{n}" for n in range(num_functions)]
+data_names = [f"d{n}" for n in range(num_data)]
+profiled_functions = function_names[: int(num_functions / 2)]
+
+function_contents = [
+ f"""
+{name}:
+ add w0, w0, #{i % 4096}
+ add w1, w1, #{i % 10}
+ add w2, w0, #{i % 20}
+ adrp x3, {name}
+ ret
+"""
+ for i, name in enumerate(function_names)
+]
+
+data_contents = [
+ f"""
+{name}:
+ .ascii "s{i % 2}-{i % 3}-{i % 5}"
+ .xword {name}
+"""
+ for i, name in enumerate(data_names)
+]
+
+trace_contents = [
+ f"""
+# Weight
+1
+{", ".join(random.sample(profiled_functions, len(profiled_functions)))}
+"""
+ for i in range(num_traces)
+]
+
+profile_contents = [
+ f"""
+{name}
+# Func Hash:
+{i}
+# Num Counters:
+1
+# Counter Values:
+1
+"""
+ for i, name in enumerate(profiled_functions)
+]
+
+with open(assembly_filepath, "w") as f:
+ f.write(
+ f"""
+.text
+.globl _main
+
+_main:
+ ret
+
+{"".join(function_contents)}
+
+.data
+{"".join(data_contents)}
+
+"""
+ )
+
+with open(proftext_filepath, "w") as f:
+ f.write(
+ f"""
+:ir
+:temporal_prof_traces
+
+# Num Traces
+{num_traces}
+# Trace Stream Size:
+{num_traces}
+
+{"".join(trace_contents)}
+
+{"".join(profile_contents)}
+"""
+ )
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
new file mode 100644
index 00000000000000..3164f9b5b4b18b
--- /dev/null
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -0,0 +1,269 @@
+# REQUIRES: aarch64
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
+# RUN: llvm-profdata merge a.proftext -o a.profdata
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+
+# STARTUP: Ordered 3 sections using balanced partitioning
+
+# RUN: ld.lld -e main -o - a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+
+# ORDERFILE: _Z1Av
+# ORDERFILE: _Z1Fi
+# ORDERFILE: _Z1Ei
+# ORDERFILE: _Z1Di
+# ORDERFILE: _Z1Ci
+# ORDERFILE: _Z1Bi
+# ORDERFILE: main
+# ORDERFILE: r1
+# ORDERFILE: r2
+
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
+
+# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 3 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 10 sections using balanced partitioning
+
+#--- a.proftext
+:ir
+:temporal_prof_traces
+# Num Traces
+1
+# Trace Stream Size:
+1
+# Weight
+1
+_Z1Av, _Z1Bi, _Z1Ci
+
+_Z1Av
+# Func Hash:
+1111
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Bi
+# Func Hash:
+2222
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Ci
+# Func Hash:
+3333
+# Num Counters:
+1
+# Counter Values:
+1
+
+_Z1Di
+# Func Hash:
+4444
+# Num Counters:
+1
+# Counter Values:
+1
+
+#--- a.orderfile
+_Z1Av
+_Z1Fi
+_Z1Ei
+_Z1Di
+
+#--- a.cc
+const char s1[] = "hello world";
+const char s2[] = "i am a string";
+const char* r1 = s1;
+const char** r2 = &r1;
+void A() {
+ return;
+}
+
+int B(int a) {
+ A();
+ return a + 1;
+}
+
+int C(int a) {
+ A();
+ return a + 2;
+}
+
+int D(int a) {
+ return B(a + 2);
+}
+
+int E(int a) {
+ return C(a + 2);
+}
+
+int F(int a) {
+ return C(a + 3);
+}
+
+int main() {
+ return 0;
+}
+#--- gen
+echo '#--- a.s'
+clang --target=aarch64-linux-gnu -fdebug-compilation-dir='/proc/self/cwd' -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.cc -o -
+;--- a.ll
+#--- a.s
+ .text
+ .file "a.cc"
+ .section .text._Z1Av,"ax", at progbits
+ .globl _Z1Av // -- Begin function _Z1Av
+ .p2align 2
+ .type _Z1Av, at function
+_Z1Av: // @_Z1Av
+// %bb.0:
+ ret
+.Lfunc_end0:
+ .size _Z1Av, .Lfunc_end0-_Z1Av
+ // -- End function
+ .section .text._Z1Bi,"ax", at progbits
+ .globl _Z1Bi // -- Begin function _Z1Bi
+ .p2align 2
+ .type _Z1Bi, at function
+_Z1Bi: // @_Z1Bi
+// %bb.0:
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ bl _Z1Av
+ ldur w8, [x29, #-4]
+ add w0, w8, #1
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end1:
+ .size _Z1Bi, .Lfunc_end1-_Z1Bi
+ // -- End function
+ .section .text._Z1Ci,"ax", at progbits
+ .globl _Z1Ci // -- Begin function _Z1Ci
+ .p2align 2
+ .type _Z1Ci, at function
+_Z1Ci: // @_Z1Ci
+// %bb.0:
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ bl _Z1Av
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end2:
+ .size _Z1Ci, .Lfunc_end2-_Z1Ci
+ // -- End function
+ .section .text._Z1Di,"ax", at progbits
+ .globl _Z1Di // -- Begin function _Z1Di
+ .p2align 2
+ .type _Z1Di, at function
+_Z1Di: // @_Z1Di
+// %bb.0:
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ bl _Z1Bi
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end3:
+ .size _Z1Di, .Lfunc_end3-_Z1Di
+ // -- End function
+ .section .text._Z1Ei,"ax", at progbits
+ .globl _Z1Ei // -- Begin function _Z1Ei
+ .p2align 2
+ .type _Z1Ei, at function
+_Z1Ei: // @_Z1Ei
+// %bb.0:
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #2
+ bl _Z1Ci
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end4:
+ .size _Z1Ei, .Lfunc_end4-_Z1Ei
+ // -- End function
+ .section .text._Z1Fi,"ax", at progbits
+ .globl _Z1Fi // -- Begin function _Z1Fi
+ .p2align 2
+ .type _Z1Fi, at function
+_Z1Fi: // @_Z1Fi
+// %bb.0:
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #3
+ bl _Z1Ci
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end5:
+ .size _Z1Fi, .Lfunc_end5-_Z1Fi
+ // -- End function
+ .section .text.main,"ax", at progbits
+ .globl main // -- Begin function main
+ .p2align 2
+ .type main, at function
+main: // @main
+// %bb.0:
+ sub sp, sp, #16
+ mov w0, wzr
+ str wzr, [sp, #12]
+ add sp, sp, #16
+ ret
+.Lfunc_end6:
+ .size main, .Lfunc_end6-main
+ // -- End function
+ .type _ZL2s1, at object // @_ZL2s1
+ .section .rodata._ZL2s1,"a", at progbits
+_ZL2s1:
+ .asciz "hello world"
+ .size _ZL2s1, 12
+
+ .type r1, at object // @r1
+ .section .data.r1,"aw", at progbits
+ .globl r1
+ .p2align 3, 0x0
+r1:
+ .xword _ZL2s1
+ .size r1, 8
+
+ .type r2, at object // @r2
+ .section .data.r2,"aw", at progbits
+ .globl r2
+ .p2align 3, 0x0
+r2:
+ .xword r1
+ .size r2, 8
+
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .addrsig_sym _Z1Av
+ .addrsig_sym _Z1Bi
+ .addrsig_sym _Z1Ci
+ .addrsig_sym _ZL2s1
+ .addrsig_sym r1
diff --git a/lld/test/ELF/incompatible.s b/lld/test/ELF/incompatible.s
index 0d25acd857610b..05edc69cda7c24 100644
--- a/lld/test/ELF/incompatible.s
+++ b/lld/test/ELF/incompatible.s
@@ -1,3 +1,16 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t
+# RUN: not ld.lld -o /dev/null %t --irpgo-profile=/dev/null --bp-startup-sort=function --call-graph-ordering-file=/dev/null 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR
+# IRPGO-ERR: --bp-startup-sort=function is incompatible with --call-graph-ordering-file
+# RUN: not ld.lld -o /dev/null --bp-compression-sort=function --call-graph-ordering-file /dev/null 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR
+# COMPRESSION-ERR: --bp-compression-sort is incompatible with --call-graph-ordering-file
+# RUN: not ld.lld -o /dev/null --bp-compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM
+# COMPRESSION-MALFORM: unknown value 'malformed' for --bp-compression-sort=
+# RUN: not ld.lld -o /dev/null --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=STARTUP
+# STARTUP: --bp-startup-sort=function must be used with --irpgo-profile
+# RUN: not ld.lld -o /dev/null --bp-compression-sort-startup-functions 2>&1 | FileCheck %s --check-prefix=STARTUP-COMPRESSION
+# STARTUP-COMPRESSION: --bp-compression-sort-startup-functions must be used with --irpgo-profile
+
// REQUIRES: x86,aarch64
// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %ta.o
// RUN: llvm-mc -filetype=obj -triple=i686-unknown-linux %s -o %tb.o
>From 53f5e6f510fa3dbe86e0fce77ce46be9fe99b26f Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Wed, 11 Dec 2024 13:00:57 +0800
Subject: [PATCH 2/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/test/ELF/bp-section-orderer.s | 192 +++++++++++++++++++-----------
1 file changed, 120 insertions(+), 72 deletions(-)
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 3164f9b5b4b18b..470c01f6bce5f2 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -3,22 +3,32 @@
# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
# RUN: llvm-profdata merge a.proftext -o a.profdata
-# RUN: ld.lld -e main -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: ld.lld -e main -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: ld.lld -e main -o a.out a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP
# STARTUP: Ordered 3 sections using balanced partitioning
-# RUN: ld.lld -e main -o - a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+# RUN: ld.lld -e main -o - a.o --symbol-ordering-file a.orderfile --irpgo-profile=a.profdata --bp-startup-sort=function | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
+# RUN: ld.lld -e main -o - a.o --symbol-ordering-file a.orderfile --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
-# ORDERFILE: _Z1Av
-# ORDERFILE: _Z1Fi
-# ORDERFILE: _Z1Ei
-# ORDERFILE: _Z1Di
-# ORDERFILE: _Z1Ci
-# ORDERFILE: _Z1Bi
-# ORDERFILE: main
-# ORDERFILE: r1
+# Rodata
+# ORDERFILE: s2
+# ORDERFILE: s1
+# ORDERFILE-DAG: s3
+
+# Functions
+# ORDERFILE: A
+# ORDERFILE: F
+# ORDERFILE: E
+# ORDERFILE: D
+# ORDERFILE-DAG: main
+# ORDERFILE-DAG: B
+# ORDERFILE-DAG: C
+
+# Data
+# ORDERFILE: r3
# ORDERFILE: r2
+# ORDERFILE-DAG: r1
+# ORDERFILE-DAG: r4
# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
@@ -26,8 +36,8 @@
# RUN: ld.lld -e main -o a.out a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
-# COMPRESSION-DATA: Ordered 3 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 10 sections using balanced partitioning
+# COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
#--- a.proftext
:ir
@@ -38,9 +48,9 @@
1
# Weight
1
-_Z1Av, _Z1Bi, _Z1Ci
+A, B, C
-_Z1Av
+A
# Func Hash:
1111
# Num Counters:
@@ -48,7 +58,7 @@ _Z1Av
# Counter Values:
1
-_Z1Bi
+B
# Func Hash:
2222
# Num Counters:
@@ -56,7 +66,7 @@ _Z1Bi
# Counter Values:
1
-_Z1Ci
+C
# Func Hash:
3333
# Num Counters:
@@ -64,7 +74,7 @@ _Z1Ci
# Counter Values:
1
-_Z1Di
+D
# Func Hash:
4444
# Num Counters:
@@ -73,16 +83,23 @@ _Z1Di
1
#--- a.orderfile
-_Z1Av
-_Z1Fi
-_Z1Ei
-_Z1Di
+A
+F
+E
+D
+s2
+s1
+r3
+r2
-#--- a.cc
+#--- a.c
const char s1[] = "hello world";
const char s2[] = "i am a string";
+const char s3[] = "this is s3";
const char* r1 = s1;
const char** r2 = &r1;
+const char*** r3 = &r2;
+const char* r4 = s2;
void A() {
return;
}
@@ -113,65 +130,63 @@ int main() {
return 0;
}
#--- gen
-echo '#--- a.s'
-clang --target=aarch64-linux-gnu -fdebug-compilation-dir='/proc/self/cwd' -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.cc -o -
-;--- a.ll
-#--- a.s
+clang --target=aarch64-linux-gnu -O0 -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fno-asynchronous-unwind-tables -S a.c -o -
+;--- a.s
.text
- .file "a.cc"
- .section .text._Z1Av,"ax", at progbits
- .globl _Z1Av // -- Begin function _Z1Av
+ .file "a.c"
+ .section .text.A,"ax", at progbits
+ .globl A // -- Begin function A
.p2align 2
- .type _Z1Av, at function
-_Z1Av: // @_Z1Av
+ .type A, at function
+A: // @A
// %bb.0:
ret
.Lfunc_end0:
- .size _Z1Av, .Lfunc_end0-_Z1Av
+ .size A, .Lfunc_end0-A
// -- End function
- .section .text._Z1Bi,"ax", at progbits
- .globl _Z1Bi // -- Begin function _Z1Bi
+ .section .text.B,"ax", at progbits
+ .globl B // -- Begin function B
.p2align 2
- .type _Z1Bi, at function
-_Z1Bi: // @_Z1Bi
+ .type B, at function
+B: // @B
// %bb.0:
sub sp, sp, #32
stp x29, x30, [sp, #16] // 16-byte Folded Spill
add x29, sp, #16
stur w0, [x29, #-4]
- bl _Z1Av
+ bl A
ldur w8, [x29, #-4]
add w0, w8, #1
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
add sp, sp, #32
ret
.Lfunc_end1:
- .size _Z1Bi, .Lfunc_end1-_Z1Bi
+ .size B, .Lfunc_end1-B
// -- End function
- .section .text._Z1Ci,"ax", at progbits
- .globl _Z1Ci // -- Begin function _Z1Ci
+ .section .text.C,"ax", at progbits
+ .globl C // -- Begin function C
.p2align 2
- .type _Z1Ci, at function
-_Z1Ci: // @_Z1Ci
+ .type C, at function
+C: // @C
// %bb.0:
sub sp, sp, #32
stp x29, x30, [sp, #16] // 16-byte Folded Spill
add x29, sp, #16
stur w0, [x29, #-4]
- bl _Z1Av
+ bl A
ldur w8, [x29, #-4]
add w0, w8, #2
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
add sp, sp, #32
ret
.Lfunc_end2:
- .size _Z1Ci, .Lfunc_end2-_Z1Ci
+ .size C, .Lfunc_end2-C
// -- End function
- .section .text._Z1Di,"ax", at progbits
- .globl _Z1Di // -- Begin function _Z1Di
+ .section .text.D,"ax", at progbits
+ .globl D // -- Begin function D
.p2align 2
- .type _Z1Di, at function
-_Z1Di: // @_Z1Di
+ .type D, at function
+D: // @D
// %bb.0:
sub sp, sp, #32
stp x29, x30, [sp, #16] // 16-byte Folded Spill
@@ -179,18 +194,18 @@ _Z1Di: // @_Z1Di
stur w0, [x29, #-4]
ldur w8, [x29, #-4]
add w0, w8, #2
- bl _Z1Bi
+ bl B
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
add sp, sp, #32
ret
.Lfunc_end3:
- .size _Z1Di, .Lfunc_end3-_Z1Di
+ .size D, .Lfunc_end3-D
// -- End function
- .section .text._Z1Ei,"ax", at progbits
- .globl _Z1Ei // -- Begin function _Z1Ei
+ .section .text.E,"ax", at progbits
+ .globl E // -- Begin function E
.p2align 2
- .type _Z1Ei, at function
-_Z1Ei: // @_Z1Ei
+ .type E, at function
+E: // @E
// %bb.0:
sub sp, sp, #32
stp x29, x30, [sp, #16] // 16-byte Folded Spill
@@ -198,18 +213,18 @@ _Z1Ei: // @_Z1Ei
stur w0, [x29, #-4]
ldur w8, [x29, #-4]
add w0, w8, #2
- bl _Z1Ci
+ bl C
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
add sp, sp, #32
ret
.Lfunc_end4:
- .size _Z1Ei, .Lfunc_end4-_Z1Ei
+ .size E, .Lfunc_end4-E
// -- End function
- .section .text._Z1Fi,"ax", at progbits
- .globl _Z1Fi // -- Begin function _Z1Fi
+ .section .text.F,"ax", at progbits
+ .globl F // -- Begin function F
.p2align 2
- .type _Z1Fi, at function
-_Z1Fi: // @_Z1Fi
+ .type F, at function
+F: // @F
// %bb.0:
sub sp, sp, #32
stp x29, x30, [sp, #16] // 16-byte Folded Spill
@@ -217,12 +232,12 @@ _Z1Fi: // @_Z1Fi
stur w0, [x29, #-4]
ldur w8, [x29, #-4]
add w0, w8, #3
- bl _Z1Ci
+ bl C
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
add sp, sp, #32
ret
.Lfunc_end5:
- .size _Z1Fi, .Lfunc_end5-_Z1Fi
+ .size F, .Lfunc_end5-F
// -- End function
.section .text.main,"ax", at progbits
.globl main // -- Begin function main
@@ -238,18 +253,33 @@ main: // @main
.Lfunc_end6:
.size main, .Lfunc_end6-main
// -- End function
- .type _ZL2s1, at object // @_ZL2s1
- .section .rodata._ZL2s1,"a", at progbits
-_ZL2s1:
+ .type s1, at object // @s1
+ .section .rodata.s1,"a", at progbits
+ .globl s1
+s1:
.asciz "hello world"
- .size _ZL2s1, 12
+ .size s1, 12
+
+ .type s2, at object // @s2
+ .section .rodata.s2,"a", at progbits
+ .globl s2
+s2:
+ .asciz "i am a string"
+ .size s2, 14
+
+ .type s3, at object // @s3
+ .section .rodata.s3,"a", at progbits
+ .globl s3
+s3:
+ .asciz "this is s3"
+ .size s3, 11
.type r1, at object // @r1
.section .data.r1,"aw", at progbits
.globl r1
.p2align 3, 0x0
r1:
- .xword _ZL2s1
+ .xword s1
.size r1, 8
.type r2, at object // @r2
@@ -260,10 +290,28 @@ r2:
.xword r1
.size r2, 8
+ .type r3, at object // @r3
+ .section .data.r3,"aw", at progbits
+ .globl r3
+ .p2align 3, 0x0
+r3:
+ .xword r2
+ .size r3, 8
+
+ .type r4, at object // @r4
+ .section .data.r4,"aw", at progbits
+ .globl r4
+ .p2align 3, 0x0
+r4:
+ .xword s2
+ .size r4, 8
+
.section ".note.GNU-stack","", at progbits
.addrsig
- .addrsig_sym _Z1Av
- .addrsig_sym _Z1Bi
- .addrsig_sym _Z1Ci
- .addrsig_sym _ZL2s1
+ .addrsig_sym A
+ .addrsig_sym B
+ .addrsig_sym C
+ .addrsig_sym s1
+ .addrsig_sym s2
.addrsig_sym r1
+ .addrsig_sym r2
>From 9c22ea6d0a0b9d4dfc9075e0e9e3c6a8717372a8 Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Wed, 11 Dec 2024 16:03:08 +0800
Subject: [PATCH 3/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/Common/BPSectionOrdererBase.cpp | 17 ++++++-----
lld/ELF/BPSectionOrderer.cpp | 17 ++++-------
lld/ELF/BPSectionOrderer.h | 29 ++++++++-----------
lld/MachO/BPSectionOrderer.cpp | 8 ++---
lld/MachO/BPSectionOrderer.h | 18 ++++++------
lld/include/lld/Common/BPSectionOrdererBase.h | 23 ++++++++-------
6 files changed, 52 insertions(+), 60 deletions(-)
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index e65ce99375cd49..f569a34491e1da 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -41,7 +41,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
for (unsigned sectionIdx : sectionIdxs) {
const auto *isec = sections[sectionIdx];
- isec->getSectionHash(hashes);
+ isec->getSectionHashes(hashes);
sectionHashes.emplace_back(sectionIdx, std::move(hashes));
hashes.clear();
}
@@ -100,7 +100,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
}
llvm::DenseMap<const BPSectionBase *, size_t>
-BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
+BPSectionBase::reorderSectionsByBalancedPartitioning(
size_t &highestAvailablePriority, llvm::StringRef profilePath,
bool forFunctionCompression, bool forDataCompression,
bool compressionSortStartupFunctions, bool verbose,
@@ -129,9 +129,10 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
name = BPSectionBase::getRootSymbol(name);
rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
sectionIdxs.end());
- if (sections[*sectionIdxs.begin()]->needResolveLinkageName(name))
- rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(),
- sectionIdxs.end());
+ if (auto resolvedLinkageName =
+ sections[*sectionIdxs.begin()]->getResolvedLinkageName(name))
+ rootSymbolToSectionIdxs[resolvedLinkageName.value()].insert(
+ sectionIdxs.begin(), sectionIdxs.end());
}
BPFunctionNode::UtilityNodeT maxUN = 0;
@@ -339,8 +340,10 @@ BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
StringRef rootSymbol = d->getName();
rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
- if (isec->needResolveLinkageName(rootSymbol))
- symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+ if (auto resolvedLinkageName =
+ isec->getResolvedLinkageName(rootSymbol))
+ symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
+ firstPage, lastPage);
}
}
currentAddress += isec->getSize();
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 5d9f28a9af7402..bc1dafbdb1ca1a 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -38,28 +38,23 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
if (sym->getSize() > 0)
if (auto *d = dyn_cast<Defined>(sym))
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
- sections.emplace_back(std::make_unique<BPSectionELF>(
- sec, std::make_unique<BPSymbolELF>(sym)));
+ sections.emplace_back(std::make_unique<BPSectionELF>(sec, sym));
for (ELFFileBase *file : ctx.objectFiles)
for (Symbol *sym : file->getLocalSymbols())
if (sym->getSize() > 0)
if (auto *d = dyn_cast<Defined>(sym))
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
- sections.emplace_back(std::make_unique<BPSectionELF>(
- sec, std::make_unique<BPSymbolELF>(sym)));
+ sections.emplace_back(std::make_unique<BPSectionELF>(sec, sym));
- auto reorderedSections =
- lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
- highestAvailablePriority, profilePath, forFunctionCompression,
- forDataCompression, compressionSortStartupFunctions, verbose,
- sections);
+ auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
+ highestAvailablePriority, profilePath, forFunctionCompression,
+ forDataCompression, compressionSortStartupFunctions, verbose, sections);
DenseMap<const InputSectionBase *, int> result;
for (const auto &[sec, priority] : reorderedSections) {
auto *elfSection = cast<BPSectionELF>(sec);
- result.try_emplace(elfSection->getSymbol()->getInputSection(),
- static_cast<int>(priority));
+ result.try_emplace(elfSection->getSection(), static_cast<int>(priority));
}
return result;
}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 73a44833ae003e..daabeeef091f70 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -68,18 +68,14 @@ class BPSymbolELF : public BPSymbol {
class BPSectionELF : public BPSectionBase {
const InputSectionBase *isec;
- std::unique_ptr<BPSymbolELF> symbol;
+ Symbol *symbol;
public:
- explicit BPSectionELF(const InputSectionBase *sec,
- std::unique_ptr<BPSymbolELF> sym)
- : isec(sec), symbol(std::move(sym)) {}
+ explicit BPSectionELF(const InputSectionBase *sec, Symbol *sym)
+ : isec(sec), symbol(sym) {}
const InputSectionBase *getSection() const { return isec; }
- BPSymbolELF *getSymbol() const { return symbol.get(); }
- llvm::StringRef getName() const override { return isec->name; }
-
uint64_t getSize() const override { return isec->getSize(); }
bool isCodeSection() const override {
@@ -90,20 +86,19 @@ class BPSectionELF : public BPSectionBase {
return isec && !isec->content().empty();
}
- llvm::ArrayRef<uint8_t> getSectionData() const override {
- return isec->content();
- }
-
- llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
- return llvm::ArrayRef<std::unique_ptr<BPSymbol>>(
- reinterpret_cast<const std::unique_ptr<BPSymbol> *>(&symbol), 1);
+ SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
+ SmallVector<std::unique_ptr<BPSymbol>> symbols;
+ symbols.emplace_back(std::make_unique<BPSymbolELF>(symbol));
+ return symbols;
}
- bool needResolveLinkageName(llvm::StringRef &name) const override {
- return false;
+ std::optional<StringRef>
+ getResolvedLinkageName(llvm::StringRef name) const override {
+ return {};
}
- void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ void
+ getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
constexpr unsigned windowSize = 4;
// Calculate content hashes
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 721770f4b4a27f..19f2afdc4b1d03 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -37,11 +37,9 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
}
}
- auto reorderedSections =
- lld::BPSectionOrdererBase::reorderSectionsByBalancedPartitioning(
- highestAvailablePriority, profilePath, forFunctionCompression,
- forDataCompression, compressionSortStartupFunctions, verbose,
- sections);
+ auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
+ highestAvailablePriority, profilePath, forFunctionCompression,
+ forDataCompression, compressionSortStartupFunctions, verbose, sections);
DenseMap<const InputSection *, size_t> result;
for (const auto &[sec, priority] : reorderedSections) {
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 95f0e2aac73863..b57ec341e24942 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -58,7 +58,6 @@ class BPSymbolMacho : public BPSymbol {
class BPSectionMacho : public BPSectionBase {
const InputSection *isec;
uint64_t sectionIdx;
- mutable std::vector<std::unique_ptr<BPSymbol>> symbols;
public:
explicit BPSectionMacho(const InputSection *sec, uint64_t sectionIdx)
@@ -66,8 +65,6 @@ class BPSectionMacho : public BPSectionBase {
const InputSection *getSection() const { return isec; }
- llvm::StringRef getName() const override { return isec->getName(); }
-
uint64_t getSize() const override { return isec->getSize(); }
uint64_t getSectionIdx() const { return sectionIdx; }
@@ -78,9 +75,8 @@ class BPSectionMacho : public BPSectionBase {
return isec && !isec->data.empty() && isec->data.data();
}
- llvm::ArrayRef<uint8_t> getSectionData() const override { return isec->data; }
-
- llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const override {
+ SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
+ SmallVector<std::unique_ptr<BPSymbol>> symbols;
for (auto *d : isec->symbols) {
symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
}
@@ -89,11 +85,15 @@ class BPSectionMacho : public BPSectionBase {
// Linkage names can be prefixed with "_" or "l_" on Mach-O. See
// Mangler::getNameWithPrefix() for details.
- bool needResolveLinkageName(llvm::StringRef &name) const override {
- return (name.consume_front("_") || name.consume_front("l_"));
+ std::optional<StringRef>
+ getResolvedLinkageName(llvm::StringRef name) const override {
+ if (name.consume_front("_") || name.consume_front("l_"))
+ return name;
+ return {};
}
- void getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ void
+ getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
constexpr unsigned windowSize = 4;
// Calculate content hashes
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index e4cff1020d1dc1..32db82c2f0141b 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -35,15 +35,19 @@ class BPSymbol {
class BPSectionBase {
public:
virtual ~BPSectionBase() = default;
- virtual llvm::StringRef getName() const = 0;
virtual uint64_t getSize() const = 0;
virtual bool hasValidData() const = 0;
virtual bool isCodeSection() const = 0;
- virtual llvm::ArrayRef<uint8_t> getSectionData() const = 0;
- virtual llvm::ArrayRef<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
+ virtual llvm::SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
virtual void
- getSectionHash(llvm::SmallVectorImpl<uint64_t> &hashes) const = 0;
- virtual bool needResolveLinkageName(llvm::StringRef &name) const = 0;
+ getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const = 0;
+ virtual std::optional<llvm::StringRef>
+ getResolvedLinkageName(llvm::StringRef name) const = 0;
+
+ /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
+ /// "yyyy" are numbers that could change between builds. We need to use the
+ /// root symbol name before this suffix so these symbols can be matched with
+ /// profiles which may have different suffixes.
static llvm::StringRef getRootSymbol(llvm::StringRef Name) {
auto [P0, S0] = Name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
@@ -57,12 +61,9 @@ class BPSectionBase {
llvm::Twine::utohexstr(addend))
.str());
}
-};
-/// Base class for Balanced Partitioning section ordering, providing common
-/// functionality for both ELF and MachO formats. This shared implementation
-/// reduces code duplication while handling function and data reordering.
-class BPSectionOrdererBase {
-public:
+
+ /// Reorders sections using balanced partitioning algorithm based on profile
+ /// data.
static llvm::DenseMap<const BPSectionBase *, size_t>
reorderSectionsByBalancedPartitioning(
size_t &highestAvailablePriority, llvm::StringRef profilePath,
>From a1b4666347f07a78ebdebfc8009f87aac15b207a Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Wed, 11 Dec 2024 16:27:47 +0800
Subject: [PATCH 4/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/Common/BPSectionOrdererBase.cpp | 33 +++++++++----------
lld/ELF/BPSectionOrderer.h | 5 ++-
lld/MachO/BPSectionOrderer.h | 8 ++---
lld/include/lld/Common/BPSectionOrdererBase.h | 1 -
4 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index f569a34491e1da..c645c71467e8e5 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -119,8 +119,7 @@ BPSectionBase::reorderSectionsByBalancedPartitioning(
sectionToIdx.try_emplace(isec.get(), sectionIdx);
sections.emplace_back(isec.get());
for (auto &sym : isec->getSymbols())
- if (auto *d = sym->asDefinedSymbol())
- symbolToSectionIdxs[d->getName()].insert(sectionIdx);
+ symbolToSectionIdxs[sym->getName()].insert(sectionIdx);
}
StringMap<DenseSet<unsigned>> rootSymbolToSectionIdxs;
for (auto &entry : symbolToSectionIdxs) {
@@ -329,22 +328,20 @@ BPSectionBase::reorderSectionsByBalancedPartitioning(
uint64_t currentAddress = 0;
for (const auto *isec : orderedSections) {
for (auto &sym : isec->getSymbols()) {
- if (auto *d = sym->asDefinedSymbol()) {
- uint64_t startAddress = currentAddress + d->getValue().value_or(0);
- uint64_t endAddress = startAddress + d->getSize().value_or(0);
- uint64_t firstPage = startAddress / pageSize;
- // I think the kernel might pull in a few pages when one it touched,
- // so it might be more accurate to force lastPage to be aligned by
- // 4?
- uint64_t lastPage = endAddress / pageSize;
- StringRef rootSymbol = d->getName();
- rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
- symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
- if (auto resolvedLinkageName =
- isec->getResolvedLinkageName(rootSymbol))
- symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
- firstPage, lastPage);
- }
+ uint64_t startAddress = currentAddress + sym->getValue().value_or(0);
+ uint64_t endAddress = startAddress + sym->getSize().value_or(0);
+ uint64_t firstPage = startAddress / pageSize;
+ // I think the kernel might pull in a few pages when one it touched,
+ // so it might be more accurate to force lastPage to be aligned by
+ // 4?
+ uint64_t lastPage = endAddress / pageSize;
+ StringRef rootSymbol = sym->getName();
+ rootSymbol = BPSectionBase::getRootSymbol(rootSymbol);
+ symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
+ if (auto resolvedLinkageName =
+ isec->getResolvedLinkageName(rootSymbol))
+ symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
+ firstPage, lastPage);
}
currentAddress += isec->getSize();
}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index daabeeef091f70..e0be4b882996ff 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -43,8 +43,6 @@ class BPSymbolELF : public BPSymbol {
return llvm::dyn_cast_or_null<Defined>(sym);
}
- BPSymbol *asDefinedSymbol() override { return asDefined() ? this : nullptr; }
-
std::optional<uint64_t> getValue() const override {
if (auto *d = asDefined())
return d->value;
@@ -88,7 +86,8 @@ class BPSectionELF : public BPSectionBase {
SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
SmallVector<std::unique_ptr<BPSymbol>> symbols;
- symbols.emplace_back(std::make_unique<BPSymbolELF>(symbol));
+ if (auto *d = llvm::dyn_cast_or_null<Defined>(symbol))
+ symbols.emplace_back(std::make_unique<BPSymbolELF>(d));
return symbols;
}
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index b57ec341e24942..1ed92f4d9fd2f4 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -38,8 +38,6 @@ class BPSymbolMacho : public BPSymbol {
return llvm::dyn_cast_or_null<Defined>(sym);
}
- BPSymbol *asDefinedSymbol() override { return asDefined() ? this : nullptr; }
-
std::optional<uint64_t> getValue() const override {
if (auto *d = asDefined())
return d->value;
@@ -77,9 +75,9 @@ class BPSectionMacho : public BPSectionBase {
SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
SmallVector<std::unique_ptr<BPSymbol>> symbols;
- for (auto *d : isec->symbols) {
- symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
- }
+ for (auto *sym : isec->symbols)
+ if (auto *d = llvm::dyn_cast_or_null<Defined>(sym))
+ symbols.emplace_back(std::make_unique<BPSymbolMacho>(d));
return symbols;
}
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index 32db82c2f0141b..b8fe11a571626b 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -27,7 +27,6 @@ class BPSymbol {
public:
virtual ~BPSymbol() = default;
virtual llvm::StringRef getName() const = 0;
- virtual BPSymbol *asDefinedSymbol() = 0;
virtual std::optional<uint64_t> getValue() const = 0;
virtual std::optional<uint64_t> getSize() const = 0;
};
>From cdd9a1d615a03e02c249e21e01e513b319d4158b Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Thu, 12 Dec 2024 14:37:28 +0800
Subject: [PATCH 5/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/Common/BPSectionOrdererBase.cpp | 2 +-
lld/ELF/BPSectionOrderer.cpp | 6 ++++--
lld/ELF/BPSectionOrderer.h | 16 +++++++-------
lld/ELF/Writer.cpp | 3 ++-
lld/MachO/BPSectionOrderer.h | 21 ++++++++++++-------
lld/include/lld/Common/BPSectionOrdererBase.h | 4 +++-
6 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index c645c71467e8e5..957217de34f963 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -41,7 +41,7 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
for (unsigned sectionIdx : sectionIdxs) {
const auto *isec = sections[sectionIdx];
- isec->getSectionHashes(hashes);
+ isec->getSectionHashes(hashes, sectionToIdx);
sectionHashes.emplace_back(sectionIdx, std::move(hashes));
hashes.clear();
}
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index bc1dafbdb1ca1a..e98c0470273889 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -38,14 +38,16 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
if (sym->getSize() > 0)
if (auto *d = dyn_cast<Defined>(sym))
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
- sections.emplace_back(std::make_unique<BPSectionELF>(sec, sym));
+ sections.emplace_back(std::make_unique<BPSectionELF>(
+ sec, std::make_unique<BPSymbolELF>(d)));
for (ELFFileBase *file : ctx.objectFiles)
for (Symbol *sym : file->getLocalSymbols())
if (sym->getSize() > 0)
if (auto *d = dyn_cast<Defined>(sym))
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
- sections.emplace_back(std::make_unique<BPSectionELF>(sec, sym));
+ sections.emplace_back(std::make_unique<BPSectionELF>(
+ sec, std::make_unique<BPSymbolELF>(d)));
auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
highestAvailablePriority, profilePath, forFunctionCompression,
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index e0be4b882996ff..7b26e9da1d08e4 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -56,7 +56,7 @@ class BPSymbolELF : public BPSymbol {
}
InputSectionBase *getInputSection() const {
- if (auto *d = llvm::dyn_cast<Defined>(sym))
+ if (auto *d = asDefined())
return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
return nullptr;
}
@@ -66,11 +66,12 @@ class BPSymbolELF : public BPSymbol {
class BPSectionELF : public BPSectionBase {
const InputSectionBase *isec;
- Symbol *symbol;
+ std::unique_ptr<BPSymbolELF> symbol;
public:
- explicit BPSectionELF(const InputSectionBase *sec, Symbol *sym)
- : isec(sec), symbol(sym) {}
+ explicit BPSectionELF(const InputSectionBase *sec,
+ std::unique_ptr<BPSymbolELF> sym)
+ : isec(sec), symbol(std::move(sym)) {}
const InputSectionBase *getSection() const { return isec; }
@@ -86,7 +87,7 @@ class BPSectionELF : public BPSectionBase {
SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
SmallVector<std::unique_ptr<BPSymbol>> symbols;
- if (auto *d = llvm::dyn_cast_or_null<Defined>(symbol))
+ if (auto *d = symbol->asDefined())
symbols.emplace_back(std::make_unique<BPSymbolELF>(d));
return symbols;
}
@@ -96,8 +97,9 @@ class BPSectionELF : public BPSectionBase {
return {};
}
- void
- getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
+ const llvm::DenseMap<const BPSectionBase *, uint64_t>
+ §ionToIdx) const override {
constexpr unsigned windowSize = 4;
// Calculate content hashes
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 6ac7af93b78861..a2a31f75ff02a1 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1088,7 +1088,8 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
ctx.arg.bpDataOrderForCompression) {
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
sectionOrder = runBalancedPartitioning(
- ctx, ctx.arg.irpgoProfilePath, ctx.arg.bpFunctionOrderForCompression,
+ ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
+ ctx.arg.bpFunctionOrderForCompression,
ctx.arg.bpDataOrderForCompression,
ctx.arg.bpCompressionSortStartupFunctions,
ctx.arg.bpVerboseSectionOrderer);
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 1ed92f4d9fd2f4..dc03975189e85c 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -90,8 +90,9 @@ class BPSectionMacho : public BPSectionBase {
return {};
}
- void
- getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const override {
+ void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
+ const llvm::DenseMap<const BPSectionBase *, uint64_t>
+ §ionToIdx) const override {
constexpr unsigned windowSize = 4;
// Calculate content hashes
@@ -106,7 +107,7 @@ class BPSectionMacho : public BPSectionBase {
if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
continue;
- uint64_t relocHash = getRelocHash(r, this);
+ uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
@@ -123,13 +124,17 @@ class BPSectionMacho : public BPSectionBase {
static bool classof(const BPSectionBase *s) { return true; }
private:
- static uint64_t getRelocHash(const Reloc &reloc,
- const BPSectionMacho *section) {
+ static uint64_t getRelocHash(
+ const Reloc &reloc,
+ const llvm::DenseMap<const BPSectionBase *, uint64_t> §ionToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
- if (isec && isec == section->getSection())
- sectionIdx = section->getSectionIdx();
-
+ for (const auto &entry : sectionToIdx)
+ if (const auto *bpSection = llvm::dyn_cast<BPSectionMacho>(entry.first))
+ if (bpSection->getSection() == isec) {
+ sectionIdx = entry.second;
+ break;
+ }
std::string kind;
if (isec)
kind = ("Section " + Twine(isec->kind())).str();
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index b8fe11a571626b..1cc069d826f5df 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -39,7 +39,9 @@ class BPSectionBase {
virtual bool isCodeSection() const = 0;
virtual llvm::SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
virtual void
- getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes) const = 0;
+ getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
+ const llvm::DenseMap<const BPSectionBase *, uint64_t>
+ §ionToIdx) const = 0;
virtual std::optional<llvm::StringRef>
getResolvedLinkageName(llvm::StringRef name) const = 0;
>From bf92d57e81d79618319d08e5877f9855ec0a777e Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Fri, 13 Dec 2024 02:53:23 +0800
Subject: [PATCH 6/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/MachO/BPSectionOrderer.h | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index dc03975189e85c..11f962a8206372 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -95,6 +95,12 @@ class BPSectionMacho : public BPSectionBase {
§ionToIdx) const override {
constexpr unsigned windowSize = 4;
+ // Create a map from BPSectionBase* to InputSection* for lookup
+ llvm::DenseMap<const InputSection *, uint64_t> isecToIdx;
+ for (const auto &[section, idx] : sectionToIdx)
+ if (auto *machoSection = llvm::dyn_cast<BPSectionMacho>(section))
+ isecToIdx[machoSection->getSection()] = idx;
+
// Calculate content hashes
size_t dataSize = isec->data.size();
for (size_t i = 0; i < dataSize; i++) {
@@ -107,7 +113,7 @@ class BPSectionMacho : public BPSectionBase {
if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
continue;
- uint64_t relocHash = getRelocHash(r, sectionToIdx);
+ uint64_t relocHash = getRelocHash(r, isecToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
@@ -126,15 +132,11 @@ class BPSectionMacho : public BPSectionBase {
private:
static uint64_t getRelocHash(
const Reloc &reloc,
- const llvm::DenseMap<const BPSectionBase *, uint64_t> §ionToIdx) {
+ const llvm::DenseMap<const InputSection *, uint64_t> &isecToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
- for (const auto &entry : sectionToIdx)
- if (const auto *bpSection = llvm::dyn_cast<BPSectionMacho>(entry.first))
- if (bpSection->getSection() == isec) {
- sectionIdx = entry.second;
- break;
- }
+ if (auto it = isecToIdx.find(isec); it != isecToIdx.end())
+ sectionIdx = it->second;
std::string kind;
if (isec)
kind = ("Section " + Twine(isec->kind())).str();
>From bba4a5d4fbc471af21cca4126431c7a4b561b00d Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Fri, 13 Dec 2024 03:33:02 +0800
Subject: [PATCH 7/9] fixup! [lld][ELF] Extend profile guided function ordering
to ELF binaries
---
lld/Common/BPSectionOrdererBase.cpp | 6 ++---
lld/ELF/BPSectionOrderer.cpp | 4 +++-
lld/ELF/BPSectionOrderer.h | 4 ++--
lld/MachO/BPSectionOrderer.cpp | 4 +++-
lld/MachO/BPSectionOrderer.h | 22 ++++++-------------
lld/include/lld/Common/BPSectionOrdererBase.h | 8 +++----
6 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index 957217de34f963..f7b460d51a1620 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -29,7 +29,7 @@ namespace lld {
static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
ArrayRef<const BPSectionBase *> sections,
- const DenseMap<const BPSectionBase *, uint64_t> §ionToIdx,
+ const DenseMap<const void *, uint64_t> §ionToIdx,
ArrayRef<unsigned> sectionIdxs,
DenseMap<unsigned, SmallVector<unsigned>> *duplicateSectionIdxs,
BPFunctionNode::UtilityNodeT &maxUN) {
@@ -107,7 +107,7 @@ BPSectionBase::reorderSectionsByBalancedPartitioning(
SmallVector<std::unique_ptr<BPSectionBase>> &inputSections) {
TimeTraceScope timeScope("Setup Balanced Partitioning");
SmallVector<const BPSectionBase *> sections;
- DenseMap<const BPSectionBase *, uint64_t> sectionToIdx;
+ DenseMap<const void *, uint64_t> sectionToIdx;
StringMap<DenseSet<unsigned>> symbolToSectionIdxs;
// Process input sections
@@ -116,7 +116,7 @@ BPSectionBase::reorderSectionsByBalancedPartitioning(
continue;
unsigned sectionIdx = sections.size();
- sectionToIdx.try_emplace(isec.get(), sectionIdx);
+ sectionToIdx.try_emplace(isec->getSection(), sectionIdx);
sections.emplace_back(isec.get());
for (auto &sym : isec->getSymbols())
symbolToSectionIdxs[sym->getName()].insert(sectionIdx);
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index e98c0470273889..86605b1a762634 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -56,7 +56,9 @@ lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
DenseMap<const InputSectionBase *, int> result;
for (const auto &[sec, priority] : reorderedSections) {
auto *elfSection = cast<BPSectionELF>(sec);
- result.try_emplace(elfSection->getSection(), static_cast<int>(priority));
+ result.try_emplace(
+ static_cast<const InputSectionBase *>(elfSection->getSection()),
+ static_cast<int>(priority));
}
return result;
}
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 7b26e9da1d08e4..30d5636c95fe34 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -73,7 +73,7 @@ class BPSectionELF : public BPSectionBase {
std::unique_ptr<BPSymbolELF> sym)
: isec(sec), symbol(std::move(sym)) {}
- const InputSectionBase *getSection() const { return isec; }
+ const void *getSection() const override { return isec; }
uint64_t getSize() const override { return isec->getSize(); }
@@ -98,7 +98,7 @@ class BPSectionELF : public BPSectionBase {
}
void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
- const llvm::DenseMap<const BPSectionBase *, uint64_t>
+ const llvm::DenseMap<const void *, uint64_t>
§ionToIdx) const override {
constexpr unsigned windowSize = 4;
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 19f2afdc4b1d03..5e9fd5248b2ef0 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -44,7 +44,9 @@ DenseMap<const InputSection *, size_t> lld::macho::runBalancedPartitioning(
DenseMap<const InputSection *, size_t> result;
for (const auto &[sec, priority] : reorderedSections) {
if (auto *machoSection = dyn_cast<BPSectionMacho>(sec)) {
- result.try_emplace(machoSection->getSection(), priority);
+ result.try_emplace(
+ static_cast<const InputSection *>(machoSection->getSection()),
+ priority);
}
}
return result;
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index 11f962a8206372..fa71f739ecb3b8 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -61,7 +61,7 @@ class BPSectionMacho : public BPSectionBase {
explicit BPSectionMacho(const InputSection *sec, uint64_t sectionIdx)
: isec(sec), sectionIdx(sectionIdx) {}
- const InputSection *getSection() const { return isec; }
+ const void *getSection() const override { return isec; }
uint64_t getSize() const override { return isec->getSize(); }
@@ -91,16 +91,10 @@ class BPSectionMacho : public BPSectionBase {
}
void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
- const llvm::DenseMap<const BPSectionBase *, uint64_t>
+ const llvm::DenseMap<const void *, uint64_t>
§ionToIdx) const override {
constexpr unsigned windowSize = 4;
- // Create a map from BPSectionBase* to InputSection* for lookup
- llvm::DenseMap<const InputSection *, uint64_t> isecToIdx;
- for (const auto &[section, idx] : sectionToIdx)
- if (auto *machoSection = llvm::dyn_cast<BPSectionMacho>(section))
- isecToIdx[machoSection->getSection()] = idx;
-
// Calculate content hashes
size_t dataSize = isec->data.size();
for (size_t i = 0; i < dataSize; i++) {
@@ -113,7 +107,7 @@ class BPSectionMacho : public BPSectionBase {
if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size())
continue;
- uint64_t relocHash = getRelocHash(r, isecToIdx);
+ uint64_t relocHash = getRelocHash(r, sectionToIdx);
uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
for (uint32_t i = start; i < r.offset + r.length; i++) {
auto window = isec->data.drop_front(i).take_front(windowSize);
@@ -125,17 +119,15 @@ class BPSectionMacho : public BPSectionBase {
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
}
- const InputSection *getInputSection() const { return isec; }
-
static bool classof(const BPSectionBase *s) { return true; }
private:
- static uint64_t getRelocHash(
- const Reloc &reloc,
- const llvm::DenseMap<const InputSection *, uint64_t> &isecToIdx) {
+ static uint64_t
+ getRelocHash(const Reloc &reloc,
+ const llvm::DenseMap<const void *, uint64_t> §ionToIdx) {
auto *isec = reloc.getReferentInputSection();
std::optional<uint64_t> sectionIdx;
- if (auto it = isecToIdx.find(isec); it != isecToIdx.end())
+ if (auto it = sectionToIdx.find(isec); it != sectionToIdx.end())
sectionIdx = it->second;
std::string kind;
if (isec)
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index 1cc069d826f5df..78c51cd9786b12 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -38,10 +38,10 @@ class BPSectionBase {
virtual bool hasValidData() const = 0;
virtual bool isCodeSection() const = 0;
virtual llvm::SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const = 0;
- virtual void
- getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
- const llvm::DenseMap<const BPSectionBase *, uint64_t>
- §ionToIdx) const = 0;
+ virtual const void *getSection() const = 0;
+ virtual void getSectionHashes(
+ llvm::SmallVectorImpl<uint64_t> &hashes,
+ const llvm::DenseMap<const void *, uint64_t> §ionToIdx) const = 0;
virtual std::optional<llvm::StringRef>
getResolvedLinkageName(llvm::StringRef name) const = 0;
>From 3c8548a6d881d12a90b8a0768614692181af59c5 Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Fri, 13 Dec 2024 18:31:54 +0800
Subject: [PATCH 8/9] fix: add relocations hashes to BPSectionELF
---
lld/ELF/BPSectionOrderer.h | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 30d5636c95fe34..7f32c26bdd3e7c 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -102,16 +102,41 @@ class BPSectionELF : public BPSectionBase {
§ionToIdx) const override {
constexpr unsigned windowSize = 4;
- // Calculate content hashes
size_t size = isec->content().size();
for (size_t i = 0; i < size; i++) {
auto window = isec->content().drop_front(i).take_front(windowSize);
hashes.push_back(xxHash64(window));
}
- // TODO: Calculate relocation hashes.
- // Since in ELF, relocations are complex, but the effect without them are
- // good enough, we just use 0 as their hash.
+ for (const auto &r : isec->relocations) {
+ if (r.sym == nullptr || r.sym->getSize() == 0)
+ continue;
+ std::optional<uint64_t> sectionIdx;
+ std::string kind;
+ uint64_t relocHash;
+ kind = (" Symbol " + Twine(r.sym->kind())).str();
+ if (auto *sym = llvm::dyn_cast<Defined>(r.sym)) {
+ if (auto *relocSec =
+ llvm::dyn_cast_or_null<InputSectionBase>(sym->section)) {
+ kind += ("Section " + Twine(relocSec->kind())).str();
+ if (auto it = sectionToIdx.find(relocSec); it != sectionToIdx.end())
+ sectionIdx = it->second;
+ relocHash = BPSectionBase::getRelocHash(kind, sectionIdx.value_or(0),
+ sym->value, r.addend);
+ size_t relocSize = relocSec->content().size();
+ uint32_t start =
+ (r.offset < windowSize) ? 0 : r.offset - windowSize + 1;
+ for (uint32_t i = start; i < relocSize; i++) {
+ auto window =
+ relocSec->content().drop_front(i).take_front(windowSize);
+ hashes.push_back(xxHash64(window) + relocHash);
+ }
+ }
+ } else {
+ hashes.push_back(BPSectionBase::getRelocHash(
+ kind, sectionIdx.value_or(0), 0, r.addend));
+ }
+ }
llvm::sort(hashes);
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
>From 7c4231e4ca8664667bdbf7f35d7dcf0db1f3545b Mon Sep 17 00:00:00 2001
From: xupengying <xpy66swsry at gmail.com>
Date: Sat, 14 Dec 2024 12:46:10 +0800
Subject: [PATCH 9/9] remove useless import and fix local compilation
---
lld/Common/BPSectionOrdererBase.cpp | 1 -
lld/Common/CMakeLists.txt | 1 +
lld/ELF/BPSectionOrderer.cpp | 3 ---
lld/ELF/BPSectionOrderer.h | 1 -
lld/ELF/CMakeLists.txt | 1 +
lld/MachO/BPSectionOrderer.cpp | 4 ----
lld/MachO/BPSectionOrderer.h | 1 -
lld/include/lld/Common/BPSectionOrdererBase.h | 1 -
8 files changed, 2 insertions(+), 11 deletions(-)
diff --git a/lld/Common/BPSectionOrdererBase.cpp b/lld/Common/BPSectionOrdererBase.cpp
index f7b460d51a1620..51d56606ec92b4 100644
--- a/lld/Common/BPSectionOrdererBase.cpp
+++ b/lld/Common/BPSectionOrdererBase.cpp
@@ -19,7 +19,6 @@
#include "llvm/Support/BalancedPartitioning.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/xxhash.h"
#define DEBUG_TYPE "bp-section-orderer"
using namespace llvm;
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 2ab5093bf6887d..43e91b85821dbf 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -48,6 +48,7 @@ add_lld_library(lldCommon
Demangle
MC
Option
+ ProfileData
Support
Target
TargetParser
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 86605b1a762634..1fc6036e5dd9df 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -11,10 +11,7 @@
#include "InputFiles.h"
#include "InputSection.h"
#include "lld/Common/BPSectionOrdererBase.h"
-#include "lld/Common/CommonLinkerContext.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/BalancedPartitioning.h"
-#include "llvm/Support/TimeProfiler.h"
#include "SymbolTable.h"
#include "Symbols.h"
diff --git a/lld/ELF/BPSectionOrderer.h b/lld/ELF/BPSectionOrderer.h
index 7f32c26bdd3e7c..502b7cfbdd2ffd 100644
--- a/lld/ELF/BPSectionOrderer.h
+++ b/lld/ELF/BPSectionOrderer.h
@@ -21,7 +21,6 @@
#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index 298443cd6ea42c..ec3f6382282b1f 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -73,6 +73,7 @@ add_lld_library(lldELF
Object
Option
Passes
+ ProfileData
Support
TargetParser
TransformUtils
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 5e9fd5248b2ef0..0ffbf16007fdad 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -8,11 +8,7 @@
#include "BPSectionOrderer.h"
#include "InputSection.h"
-#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/BalancedPartitioning.h"
-#include "llvm/Support/TimeProfiler.h"
#define DEBUG_TYPE "bp-section-orderer"
diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h
index fa71f739ecb3b8..29b20c781c6b0d 100644
--- a/lld/MachO/BPSectionOrderer.h
+++ b/lld/MachO/BPSectionOrderer.h
@@ -20,7 +20,6 @@
#include "lld/Common/BPSectionOrdererBase.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
namespace lld::macho {
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.h b/lld/include/lld/Common/BPSectionOrdererBase.h
index 78c51cd9786b12..a22bfcafd79c9e 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.h
+++ b/lld/include/lld/Common/BPSectionOrdererBase.h
@@ -14,7 +14,6 @@
#ifndef LLD_COMMON_BP_SECTION_ORDERER_BASE_H
#define LLD_COMMON_BP_SECTION_ORDERER_BASE_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
More information about the llvm-commits
mailing list