[lld] [LLD] Add CLASS syntax to SECTIONS (PR #95323)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 15:45:49 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld-elf
Author: Daniel Thornburgh (mysterymath)
<details>
<summary>Changes</summary>
This allows the input section matching algorithm to be separated from output section descriptions. This allows a group of sections to be assigned to multiple output sections, providing an explicit version of --enable-non-contiguous-regions's spilling that doesn't require altering global linker script matching behavior with a flag. It also makes the linker script language more expressive even if spilling is not intended, since input section matching can be done in a different order than sections are placed in an output section.
The implementation reuses the backend mechanism provided by --enable-non-contiguous-regions, so it has roughly similar semantics and limitations. In particular, sections cannot be spilled into or out of INSERT, OVERWRITE_SECTIONS, or /DISCARD/. The former two aren't intrinsic, so it may be possible to relax those restrictions later.
---
Patch is 37.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95323.diff
10 Files Affected:
- (modified) lld/ELF/InputSection.cpp (+2)
- (modified) lld/ELF/InputSection.h (+4-2)
- (modified) lld/ELF/LinkerScript.cpp (+161-78)
- (modified) lld/ELF/LinkerScript.h (+19-4)
- (modified) lld/ELF/MapFile.cpp (+2)
- (modified) lld/ELF/OutputSections.h (+21)
- (modified) lld/ELF/ScriptParser.cpp (+54-5)
- (modified) lld/docs/ELF/linker_script.rst (+46-7)
- (modified) lld/docs/ReleaseNotes.rst (+12-6)
- (added) lld/test/ELF/linkerscript/section-class.test (+379)
``````````diff
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e6c5996c0b392..d9338611f8d5e 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -159,6 +159,8 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
// For output sections we treat offset -1 as the end of the section.
return offset == uint64_t(-1) ? os->size : offset;
}
+ case Class:
+ llvm_unreachable("section classes do not have offsets");
case Regular:
case Synthetic:
case Spill:
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 58e5306fd6dcd..1c8550ba19baf 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -48,7 +48,7 @@ template <class ELFT> struct RelsOrRelas {
// sections.
class SectionBase {
public:
- enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output };
+ enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output, Class };
Kind kind() const { return (Kind)sectionKind; }
@@ -132,7 +132,9 @@ class InputSectionBase : public SectionBase {
uint32_t addralign, ArrayRef<uint8_t> data, StringRef name,
Kind sectionKind);
- static bool classof(const SectionBase *s) { return s->kind() != Output; }
+ static bool classof(const SectionBase *s) {
+ return s->kind() != Output && s->kind() != Class;
+ }
// The file which contains this section. Its dynamic type is usually
// ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 3ba59c112b8a8..aa3cafb050259 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -276,6 +276,8 @@ getSymbolAssignmentValues(ArrayRef<SectionCommand *> sectionCommands) {
assign->sym->value));
continue;
}
+ if (isa<SectionClassDesc>(cmd))
+ continue;
for (SectionCommand *subCmd : cast<OutputDesc>(cmd)->osec.commands)
if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
if (assign->sym)
@@ -347,6 +349,8 @@ void LinkerScript::declareSymbols() {
declareSymbol(assign);
continue;
}
+ if (isa<SectionClassDesc>(cmd))
+ continue;
// If the output section directive has constraints,
// we can't say for sure if it is going to be included or not.
@@ -490,99 +494,130 @@ static void sortInputSections(MutableArrayRef<InputSectionBase *> vec,
SmallVector<InputSectionBase *, 0>
LinkerScript::computeInputSections(const InputSectionDescription *cmd,
ArrayRef<InputSectionBase *> sections,
- const OutputSection &outCmd) {
+ const SectionBase &outCmd) {
SmallVector<InputSectionBase *, 0> ret;
- SmallVector<size_t, 0> indexes;
- DenseSet<size_t> seen;
DenseSet<InputSectionBase *> spills;
- auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
- llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
- for (size_t i = begin; i != end; ++i)
- ret[i] = sections[indexes[i]];
- sortInputSections(
- MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin),
- config->sortSection, SortSectionPolicy::None);
+
+ const auto flagsMatch = [cmd](InputSectionBase *sec) {
+ return (sec->flags & cmd->withFlags) == cmd->withFlags &&
+ (sec->flags & cmd->withoutFlags) == 0;
};
// Collects all sections that satisfy constraints of Cmd.
- size_t sizeAfterPrevSort = 0;
- for (const SectionPattern &pat : cmd->sectionPatterns) {
- size_t sizeBeforeCurrPat = ret.size();
-
- for (size_t i = 0, e = sections.size(); i != e; ++i) {
- // Skip if the section is dead or has been matched by a previous pattern
- // in this input section description.
- InputSectionBase *sec = sections[i];
- if (!sec->isLive() || seen.contains(i))
- continue;
-
- // For --emit-relocs we have to ignore entries like
- // .rela.dyn : { *(.rela.data) }
- // which are common because they are in the default bfd script.
- // We do not ignore SHT_REL[A] linker-synthesized sections here because
- // want to support scripts that do custom layout for them.
- if (isa<InputSection>(sec) &&
- cast<InputSection>(sec)->getRelocatedSection())
- continue;
-
- // Check the name early to improve performance in the common case.
- if (!pat.sectionPat.match(sec->name))
- continue;
-
- if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) ||
- (sec->flags & cmd->withFlags) != cmd->withFlags ||
- (sec->flags & cmd->withoutFlags) != 0)
- continue;
-
- if (sec->parent) {
- // Skip if not allowing multiple matches.
- if (!config->enableNonContiguousRegions)
+ if (cmd->className.empty()) {
+ DenseSet<size_t> seen;
+ size_t sizeAfterPrevSort = 0;
+ SmallVector<size_t, 0> indexes;
+ auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
+ llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
+ for (size_t i = begin; i != end; ++i)
+ ret[i] = sections[indexes[i]];
+ sortInputSections(
+ MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin),
+ config->sortSection, SortSectionPolicy::None);
+ };
+
+ for (const SectionPattern &pat : cmd->sectionPatterns) {
+ size_t sizeBeforeCurrPat = ret.size();
+
+ for (size_t i = 0, e = sections.size(); i != e; ++i) {
+ // Skip if the section is dead or has been matched by a previous pattern
+ // in this input section description.
+ InputSectionBase *sec = sections[i];
+ if (!sec->isLive() || seen.contains(i))
continue;
- // Disallow spilling into /DISCARD/; special handling would be needed
- // for this in address assignment, and the semantics are nebulous.
- if (outCmd.name == "/DISCARD/")
+ // For --emit-relocs we have to ignore entries like
+ // .rela.dyn : { *(.rela.data) }
+ // which are common because they are in the default bfd script.
+ // We do not ignore SHT_REL[A] linker-synthesized sections here because
+ // want to support scripts that do custom layout for them.
+ if (isa<InputSection>(sec) &&
+ cast<InputSection>(sec)->getRelocatedSection())
continue;
- // Skip if the section's first match was /DISCARD/; such sections are
- // always discarded.
- if (sec->parent->name == "/DISCARD/")
+ // Check the name early to improve performance in the common case.
+ if (!pat.sectionPat.match(sec->name))
continue;
- // Skip if the section was already matched by a different input section
- // description within this output section.
- if (sec->parent == &outCmd)
+ if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) ||
+ !flagsMatch(sec))
continue;
- spills.insert(sec);
+ if (sec->parent) {
+ // Skip if not allowing multiple matches.
+ if (!config->enableNonContiguousRegions)
+ continue;
+
+ // Disallow spilling out of or into section classes; that's already a
+ // mechanism for spilling.
+ if (isa<SectionClass>(sec->parent) || isa<SectionClass>(outCmd))
+ continue;
+
+ // Disallow spilling into /DISCARD/; special handling would be needed
+ // for this in address assignment, and the semantics are nebulous.
+ if (outCmd.name == "/DISCARD/")
+ continue;
+
+ // Skip if the section was already matched by a different input section
+ // description within this output section or class.
+ if (sec->parent == &outCmd)
+ continue;
+
+ spills.insert(sec);
+ }
+
+ ret.push_back(sec);
+ indexes.push_back(i);
+ seen.insert(i);
}
- ret.push_back(sec);
- indexes.push_back(i);
- seen.insert(i);
+ if (pat.sortOuter == SortSectionPolicy::Default)
+ continue;
+
+ // Matched sections are ordered by radix sort with the keys being (SORT*,
+ // --sort-section, input order), where SORT* (if present) is most
+ // significant.
+ //
+ // Matched sections between the previous SORT* and this SORT* are sorted by
+ // (--sort-alignment, input order).
+ sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat);
+ // Matched sections by this SORT* pattern are sorted using all 3 keys.
+ // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we
+ // just sort by sortOuter and sortInner.
+ sortInputSections(
+ MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat),
+ pat.sortOuter, pat.sortInner);
+ sizeAfterPrevSort = ret.size();
}
- if (pat.sortOuter == SortSectionPolicy::Default)
- continue;
+ // Matched sections after the last SORT* are sorted by (--sort-alignment,
+ // input order).
+ sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());
+ } else {
+ SectionClassDesc *scd = script->sectionClasses.lookup(cmd->className);
+ if (!scd) {
+ error("undefined section class '" + cmd->className + "'");
+ return ret;
+ }
+ if (!scd->sc.assigned) {
+ error("section class '" + cmd->className + "' used before assigned");
+ return ret;
+ }
- // Matched sections are ordered by radix sort with the keys being (SORT*,
- // --sort-section, input order), where SORT* (if present) is most
- // significant.
- //
- // Matched sections between the previous SORT* and this SORT* are sorted by
- // (--sort-alignment, input order).
- sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat);
- // Matched sections by this SORT* pattern are sorted using all 3 keys.
- // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we
- // just sort by sortOuter and sortInner.
- sortInputSections(
- MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat),
- pat.sortOuter, pat.sortInner);
- sizeAfterPrevSort = ret.size();
+ for (InputSectionDescription *isd : scd->sc.commands) {
+ for (InputSectionBase *sec : isd->sectionBases) {
+ if (sec->parent == &outCmd || !flagsMatch(sec))
+ continue;
+ bool isSpill = sec->parent && isa<OutputSection>(sec->parent);
+ if (!sec->parent || (isSpill && outCmd.name == "/DISCARD/"))
+ error("section '" + sec->name + "' cannot spill from/to /DISCARD/");
+ if (isSpill)
+ spills.insert(sec);
+ ret.push_back(sec);
+ }
+ }
}
- // Matched sections after the last SORT* are sorted by (--sort-alignment,
- // input order).
- sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());
// The flag --enable-non-contiguous-regions may cause sections to match an
// InputSectionDescription in more than one OutputSection. Matches after the
@@ -707,7 +742,7 @@ void LinkerScript::processSectionCommands() {
!map.try_emplace(CachedHashStringRef(osec->name), osd).second)
warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name);
}
- for (SectionCommand *&base : sectionCommands)
+ for (SectionCommand *&base : sectionCommands) {
if (auto *osd = dyn_cast<OutputDesc>(base)) {
OutputSection *osec = &osd->osec;
if (OutputDesc *overwrite = map.lookup(CachedHashStringRef(osec->name))) {
@@ -717,7 +752,44 @@ void LinkerScript::processSectionCommands() {
} else if (process(osec)) {
osec->sectionIndex = i++;
}
+ } else if (auto *sc = dyn_cast<SectionClassDesc>(base)) {
+ for (InputSectionDescription *isd : sc->sc.commands) {
+ isd->sectionBases =
+ computeInputSections(isd, ctx.inputSections, sc->sc);
+ for (InputSectionBase *s : isd->sectionBases)
+ s->parent = &sc->sc;
+ }
+ sc->sc.assigned = true;
}
+ }
+
+ // Check that input sections cannot spill into or out of INSERT,
+ // since the semantics are nebulous. This is also true for OVERWRITE_SECTIONS,
+ // but no check is needed, since the order of processing ensures they cannot
+ // legally reference classes.
+ if (!potentialSpillLists.empty()) {
+ DenseSet<StringRef> insertNames;
+ for (InsertCommand &ic : insertCommands)
+ insertNames.insert(ic.names.begin(), ic.names.end());
+ for (SectionCommand *&base : sectionCommands) {
+ auto *osd = dyn_cast<OutputDesc>(base);
+ if (!osd)
+ continue;
+ OutputSection *os = &osd->osec;
+ if (!insertNames.contains(os->name))
+ continue;
+ for (SectionCommand *sc : os->commands) {
+ auto *isd = dyn_cast<InputSectionDescription>(sc);
+ if (!isd)
+ continue;
+ for (InputSectionBase *isec : isd->sectionBases)
+ if (isa<PotentialSpillSection>(isec) ||
+ potentialSpillLists.contains(isec))
+ error("section '" + isec->name +
+ "' cannot spill from/to INSERT section '" + os->name + "'");
+ }
+ }
+ }
// If an OVERWRITE_SECTIONS specified output section is not in
// sectionCommands, append it to the end. The section will be inserted by
@@ -725,6 +797,15 @@ void LinkerScript::processSectionCommands() {
for (OutputDesc *osd : overwriteSections)
if (osd->osec.partition == 1 && osd->osec.sectionIndex == UINT32_MAX)
sectionCommands.push_back(osd);
+
+ // Input sections cannot have a section class parent past this point; they
+ // must have been assigned to an output section.
+ for (const auto &[_, sc] : sectionClasses)
+ for (InputSectionDescription *isd : sc->sc.commands)
+ for (InputSectionBase *sec : isd->sectionBases)
+ if (sec->parent && isa<SectionClass>(sec->parent))
+ error("section '" + sec->name + "' assigned to class '" +
+ sec->parent->name + "' but to no output section");
}
void LinkerScript::processSymbolAssignments() {
@@ -745,8 +826,8 @@ void LinkerScript::processSymbolAssignments() {
for (SectionCommand *cmd : sectionCommands) {
if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
addSymbol(assign);
- else
- for (SectionCommand *subCmd : cast<OutputDesc>(cmd)->osec.commands)
+ else if (auto *od = dyn_cast<OutputDesc>(cmd))
+ for (SectionCommand *subCmd : od->osec.commands)
if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
addSymbol(assign);
}
@@ -1416,6 +1497,8 @@ const Defined *LinkerScript::assignAddresses() {
assign->size = dot - assign->addr;
continue;
}
+ if (isa<SectionClassDesc>(cmd))
+ continue;
assignOffsets(&cast<OutputDesc>(cmd)->osec);
}
@@ -1435,7 +1518,7 @@ static bool hasRegionOverflowed(MemoryRegion *mr) {
// Under-estimates may cause unnecessary spills, but over-estimates can always
// be corrected on the next pass.
bool LinkerScript::spillSections() {
- if (!config->enableNonContiguousRegions)
+ if (potentialSpillLists.empty())
return false;
bool spilled = false;
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 734d4e7498aa2..5da3bf3ab582d 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -35,6 +35,8 @@ class OutputSection;
class SectionBase;
class ThunkSection;
struct OutputDesc;
+struct SectionClass;
+struct SectionClassDesc;
// This represents an r-value in the linker script.
struct ExprValue {
@@ -78,7 +80,8 @@ enum SectionsCommandKind {
AssignmentKind, // . = expr or <sym> = expr
OutputSectionKind,
InputSectionKind,
- ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+ ByteKind, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+ ClassKind,
};
struct SectionCommand {
@@ -198,9 +201,12 @@ class InputSectionDescription : public SectionCommand {
public:
InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
- uint64_t withoutFlags = 0)
+ uint64_t withoutFlags = 0, StringRef className = {})
: SectionCommand(InputSectionKind), filePat(filePattern),
- withFlags(withFlags), withoutFlags(withoutFlags) {}
+ withFlags(withFlags), withoutFlags(withoutFlags), className(className) {
+ assert((filePattern.empty() || className.empty()) &&
+ "file pattern and class name are mutually exclusive");
+ }
static bool classof(const SectionCommand *c) {
return c->kind == InputSectionKind;
@@ -228,6 +234,10 @@ class InputSectionDescription : public SectionCommand {
// SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
uint64_t withFlags;
uint64_t withoutFlags;
+
+ // If present, input section matching uses class membership instead of file
+ // and section patterns (mutually exclusive).
+ StringRef className;
};
// Represents BYTE(), SHORT(), LONG(), or QUAD().
@@ -289,7 +299,7 @@ class LinkerScript final {
SmallVector<InputSectionBase *, 0>
computeInputSections(const InputSectionDescription *,
ArrayRef<InputSectionBase *>,
- const OutputSection &outCmd);
+ const SectionBase &outCmd);
SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
@@ -413,6 +423,11 @@ class LinkerScript final {
PotentialSpillSection *tail;
};
llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
+
+ // Named lists of input sections that can be collectively referenced in output
+ // section descriptions. Multiple references allow for sections to spill from
+ // one output section to another.
+ llvm::StringMap<SectionClassDesc*> sectionClasses;
};
struct ScriptWrapper {
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index c4f3fdde30f36..1bad529b40329 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -167,6 +167,8 @@ static void writeMapFile(raw_fd_ostream &os) {
os << assign->commandString << '\n';
continue;
}
+ if (isa<SectionClassDesc>(cmd))
+ continue;
osec = &cast<OutputDesc>(cmd)->osec;
writeHeader(os, osec->addr, osec->getLMA(), osec->size, osec->addralign);
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 78fede48a23f2..e77f774556fa0 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -137,6 +137,27 @@ struct OutputDesc final : SectionCommand {
}
};
+// A list of input sections that can be referenced in output descriptions.
+// Multiple references allow sections to spill from one output section to the
+// next.
+struct SectionClass final : public SectionBase {
+ SmallVector<InputSectionDescription *, 0> commands;
+ bool assigned = false;
+
+ SectionClass(StringRef name) : SectionBase(Class, name, 0, 0, 0, 0, 0, 0) {}
+ static bool classof(const SectionBase *s) { return s->kind() == Class; }
+};
+
+struct SectionClassDesc : SectionCommand {
+ SectionClass sc;
+
+ SectionClassDesc(StringRef name) : SectionCommand(ClassKind), sc(name) {}
+
+ static bool classof(const SectionCommand *c) {
+ return c->kind == ClassKind;
+ }
+};
+
int getPriority(StringRef s);
InputSection *getFirstInputSection(const OutputSection *os);
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index f90ce6fa74075..422bb63b8c03a 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -96,6 +96,8 @@ class ScriptParser final : ScriptLexer {
OutputDesc *readOverlaySectionDescription();
OutputDesc *readOutputSectionDescription(StringRef outSec);
SmallVector<SectionCommand *, 0> readOverlay();
+ SectionClassDesc *readSectionClassDescription();
+ StringRef readSectionClassName();
SmallVector<StringRef, 0> readOutputSectionPhdrs();
std::pair<uint64_t, uint64_t> readInputSectionFlags();
InputSectionDescription *readInputSectionDescription(StringRef tok);
@@ -585,6 +587,35 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
return v;
}
+SectionClassDesc *ScriptParser::readSectionClassDescription() {
+ std::string loc = getCurrentLocation();
+ StringRef name = readSectionClassName();
+ SectionClassDesc *desc = make<Sectio...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/95323
More information about the llvm-commits
mailing list