[lld] [LLD] Add CLASS syntax to SECTIONS (PR #95323)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 15:45:49 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-elf

Author: Daniel Thornburgh (mysterymath)

<details>
<summary>Changes</summary>

This allows the input section matching algorithm to be separated from output section descriptions. This allows a group of sections to be assigned to multiple output sections, providing an explicit version of --enable-non-contiguous-regions's spilling that doesn't require altering global linker script matching behavior with a flag. It also makes the linker script language more expressive even if spilling is not intended, since input section matching can be done in a different order than sections are placed in an output section.

The implementation reuses the backend mechanism provided by --enable-non-contiguous-regions, so it has roughly similar semantics and limitations. In particular, sections cannot be spilled into or out of INSERT, OVERWRITE_SECTIONS, or /DISCARD/. The former two aren't intrinsic, so it may be possible to relax those restrictions later.

---

Patch is 37.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95323.diff


10 Files Affected:

- (modified) lld/ELF/InputSection.cpp (+2) 
- (modified) lld/ELF/InputSection.h (+4-2) 
- (modified) lld/ELF/LinkerScript.cpp (+161-78) 
- (modified) lld/ELF/LinkerScript.h (+19-4) 
- (modified) lld/ELF/MapFile.cpp (+2) 
- (modified) lld/ELF/OutputSections.h (+21) 
- (modified) lld/ELF/ScriptParser.cpp (+54-5) 
- (modified) lld/docs/ELF/linker_script.rst (+46-7) 
- (modified) lld/docs/ReleaseNotes.rst (+12-6) 
- (added) lld/test/ELF/linkerscript/section-class.test (+379) 


``````````diff
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e6c5996c0b392..d9338611f8d5e 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -159,6 +159,8 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
     // For output sections we treat offset -1 as the end of the section.
     return offset == uint64_t(-1) ? os->size : offset;
   }
+  case Class:
+    llvm_unreachable("section classes do not have offsets");
   case Regular:
   case Synthetic:
   case Spill:
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 58e5306fd6dcd..1c8550ba19baf 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -48,7 +48,7 @@ template <class ELFT> struct RelsOrRelas {
 // sections.
 class SectionBase {
 public:
-  enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output };
+  enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output, Class };
 
   Kind kind() const { return (Kind)sectionKind; }
 
@@ -132,7 +132,9 @@ class InputSectionBase : public SectionBase {
                    uint32_t addralign, ArrayRef<uint8_t> data, StringRef name,
                    Kind sectionKind);
 
-  static bool classof(const SectionBase *s) { return s->kind() != Output; }
+  static bool classof(const SectionBase *s) {
+    return s->kind() != Output && s->kind() != Class;
+  }
 
   // The file which contains this section. Its dynamic type is usually
   // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 3ba59c112b8a8..aa3cafb050259 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -276,6 +276,8 @@ getSymbolAssignmentValues(ArrayRef<SectionCommand *> sectionCommands) {
                                                     assign->sym->value));
       continue;
     }
+    if (isa<SectionClassDesc>(cmd))
+      continue;
     for (SectionCommand *subCmd : cast<OutputDesc>(cmd)->osec.commands)
       if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
         if (assign->sym)
@@ -347,6 +349,8 @@ void LinkerScript::declareSymbols() {
       declareSymbol(assign);
       continue;
     }
+    if (isa<SectionClassDesc>(cmd))
+      continue;
 
     // If the output section directive has constraints,
     // we can't say for sure if it is going to be included or not.
@@ -490,99 +494,130 @@ static void sortInputSections(MutableArrayRef<InputSectionBase *> vec,
 SmallVector<InputSectionBase *, 0>
 LinkerScript::computeInputSections(const InputSectionDescription *cmd,
                                    ArrayRef<InputSectionBase *> sections,
-                                   const OutputSection &outCmd) {
+                                   const SectionBase &outCmd) {
   SmallVector<InputSectionBase *, 0> ret;
-  SmallVector<size_t, 0> indexes;
-  DenseSet<size_t> seen;
   DenseSet<InputSectionBase *> spills;
-  auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
-    llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
-    for (size_t i = begin; i != end; ++i)
-      ret[i] = sections[indexes[i]];
-    sortInputSections(
-        MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin),
-        config->sortSection, SortSectionPolicy::None);
+
+  const auto flagsMatch = [cmd](InputSectionBase *sec) {
+    return (sec->flags & cmd->withFlags) == cmd->withFlags &&
+           (sec->flags & cmd->withoutFlags) == 0;
   };
 
   // Collects all sections that satisfy constraints of Cmd.
-  size_t sizeAfterPrevSort = 0;
-  for (const SectionPattern &pat : cmd->sectionPatterns) {
-    size_t sizeBeforeCurrPat = ret.size();
-
-    for (size_t i = 0, e = sections.size(); i != e; ++i) {
-      // Skip if the section is dead or has been matched by a previous pattern
-      // in this input section description.
-      InputSectionBase *sec = sections[i];
-      if (!sec->isLive() || seen.contains(i))
-        continue;
-
-      // For --emit-relocs we have to ignore entries like
-      //   .rela.dyn : { *(.rela.data) }
-      // which are common because they are in the default bfd script.
-      // We do not ignore SHT_REL[A] linker-synthesized sections here because
-      // want to support scripts that do custom layout for them.
-      if (isa<InputSection>(sec) &&
-          cast<InputSection>(sec)->getRelocatedSection())
-        continue;
-
-      // Check the name early to improve performance in the common case.
-      if (!pat.sectionPat.match(sec->name))
-        continue;
-
-      if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) ||
-          (sec->flags & cmd->withFlags) != cmd->withFlags ||
-          (sec->flags & cmd->withoutFlags) != 0)
-        continue;
-
-      if (sec->parent) {
-        // Skip if not allowing multiple matches.
-        if (!config->enableNonContiguousRegions)
+  if (cmd->className.empty()) {
+    DenseSet<size_t> seen;
+    size_t sizeAfterPrevSort = 0;
+    SmallVector<size_t, 0> indexes;
+    auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
+      llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
+      for (size_t i = begin; i != end; ++i)
+        ret[i] = sections[indexes[i]];
+      sortInputSections(
+          MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin),
+          config->sortSection, SortSectionPolicy::None);
+    };
+
+    for (const SectionPattern &pat : cmd->sectionPatterns) {
+      size_t sizeBeforeCurrPat = ret.size();
+
+      for (size_t i = 0, e = sections.size(); i != e; ++i) {
+        // Skip if the section is dead or has been matched by a previous pattern
+        // in this input section description.
+        InputSectionBase *sec = sections[i];
+        if (!sec->isLive() || seen.contains(i))
           continue;
 
-        // Disallow spilling into /DISCARD/; special handling would be needed
-        // for this in address assignment, and the semantics are nebulous.
-        if (outCmd.name == "/DISCARD/")
+        // For --emit-relocs we have to ignore entries like
+        //   .rela.dyn : { *(.rela.data) }
+        // which are common because they are in the default bfd script.
+        // We do not ignore SHT_REL[A] linker-synthesized sections here because
+        // want to support scripts that do custom layout for them.
+        if (isa<InputSection>(sec) &&
+            cast<InputSection>(sec)->getRelocatedSection())
           continue;
 
-        // Skip if the section's first match was /DISCARD/; such sections are
-        // always discarded.
-        if (sec->parent->name == "/DISCARD/")
+        // Check the name early to improve performance in the common case.
+        if (!pat.sectionPat.match(sec->name))
           continue;
 
-        // Skip if the section was already matched by a different input section
-        // description within this output section.
-        if (sec->parent == &outCmd)
+        if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) ||
+            !flagsMatch(sec))
           continue;
 
-        spills.insert(sec);
+        if (sec->parent) {
+          // Skip if not allowing multiple matches.
+          if (!config->enableNonContiguousRegions)
+            continue;
+
+          // Disallow spilling out of or into section classes; that's already a
+          // mechanism for spilling.
+          if (isa<SectionClass>(sec->parent) || isa<SectionClass>(outCmd))
+            continue;
+
+          // Disallow spilling into /DISCARD/; special handling would be needed
+          // for this in address assignment, and the semantics are nebulous.
+          if (outCmd.name == "/DISCARD/")
+            continue;
+
+          // Skip if the section was already matched by a different input section
+          // description within this output section or class.
+          if (sec->parent == &outCmd)
+            continue;
+
+          spills.insert(sec);
+        }
+
+        ret.push_back(sec);
+        indexes.push_back(i);
+        seen.insert(i);
       }
 
-      ret.push_back(sec);
-      indexes.push_back(i);
-      seen.insert(i);
+      if (pat.sortOuter == SortSectionPolicy::Default)
+        continue;
+
+      // Matched sections are ordered by radix sort with the keys being (SORT*,
+      // --sort-section, input order), where SORT* (if present) is most
+      // significant.
+      //
+      // Matched sections between the previous SORT* and this SORT* are sorted by
+      // (--sort-alignment, input order).
+      sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat);
+      // Matched sections by this SORT* pattern are sorted using all 3 keys.
+      // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we
+      // just sort by sortOuter and sortInner.
+      sortInputSections(
+          MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat),
+          pat.sortOuter, pat.sortInner);
+      sizeAfterPrevSort = ret.size();
     }
 
-    if (pat.sortOuter == SortSectionPolicy::Default)
-      continue;
+    // Matched sections after the last SORT* are sorted by (--sort-alignment,
+    // input order).
+    sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());
+  } else {
+    SectionClassDesc *scd = script->sectionClasses.lookup(cmd->className);
+    if (!scd) {
+      error("undefined section class '" + cmd->className + "'");
+      return ret;
+    }
+    if (!scd->sc.assigned) {
+      error("section class '" + cmd->className + "' used before assigned");
+      return ret;
+    }
 
-    // Matched sections are ordered by radix sort with the keys being (SORT*,
-    // --sort-section, input order), where SORT* (if present) is most
-    // significant.
-    //
-    // Matched sections between the previous SORT* and this SORT* are sorted by
-    // (--sort-alignment, input order).
-    sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat);
-    // Matched sections by this SORT* pattern are sorted using all 3 keys.
-    // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we
-    // just sort by sortOuter and sortInner.
-    sortInputSections(
-        MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat),
-        pat.sortOuter, pat.sortInner);
-    sizeAfterPrevSort = ret.size();
+    for (InputSectionDescription *isd : scd->sc.commands) {
+      for (InputSectionBase *sec : isd->sectionBases) {
+        if (sec->parent == &outCmd || !flagsMatch(sec))
+          continue;
+        bool isSpill = sec->parent && isa<OutputSection>(sec->parent);
+        if (!sec->parent || (isSpill && outCmd.name == "/DISCARD/"))
+          error("section '" + sec->name + "' cannot spill from/to /DISCARD/");
+        if (isSpill)
+          spills.insert(sec);
+        ret.push_back(sec);
+      }
+    }
   }
-  // Matched sections after the last SORT* are sorted by (--sort-alignment,
-  // input order).
-  sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());
 
   // The flag --enable-non-contiguous-regions may cause sections to match an
   // InputSectionDescription in more than one OutputSection. Matches after the
@@ -707,7 +742,7 @@ void LinkerScript::processSectionCommands() {
         !map.try_emplace(CachedHashStringRef(osec->name), osd).second)
       warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name);
   }
-  for (SectionCommand *&base : sectionCommands)
+  for (SectionCommand *&base : sectionCommands) {
     if (auto *osd = dyn_cast<OutputDesc>(base)) {
       OutputSection *osec = &osd->osec;
       if (OutputDesc *overwrite = map.lookup(CachedHashStringRef(osec->name))) {
@@ -717,7 +752,44 @@ void LinkerScript::processSectionCommands() {
       } else if (process(osec)) {
         osec->sectionIndex = i++;
       }
+    } else if (auto *sc = dyn_cast<SectionClassDesc>(base)) {
+      for (InputSectionDescription *isd : sc->sc.commands) {
+        isd->sectionBases =
+            computeInputSections(isd, ctx.inputSections, sc->sc);
+        for (InputSectionBase *s : isd->sectionBases)
+          s->parent = &sc->sc;
+      }
+      sc->sc.assigned = true;
     }
+  }
+
+  // Check that input sections cannot spill into or out of INSERT,
+  // since the semantics are nebulous. This is also true for OVERWRITE_SECTIONS,
+  // but no check is needed, since the order of processing ensures they cannot
+  // legally reference classes.
+  if (!potentialSpillLists.empty()) {
+    DenseSet<StringRef> insertNames;
+    for (InsertCommand &ic : insertCommands)
+      insertNames.insert(ic.names.begin(), ic.names.end());
+    for (SectionCommand *&base : sectionCommands) {
+      auto *osd = dyn_cast<OutputDesc>(base);
+      if (!osd)
+        continue;
+      OutputSection *os = &osd->osec;
+      if (!insertNames.contains(os->name))
+        continue;
+      for (SectionCommand *sc : os->commands) {
+        auto *isd = dyn_cast<InputSectionDescription>(sc);
+        if (!isd)
+          continue;
+        for (InputSectionBase *isec : isd->sectionBases)
+          if (isa<PotentialSpillSection>(isec) ||
+              potentialSpillLists.contains(isec))
+            error("section '" + isec->name +
+                  "' cannot spill from/to INSERT section '" + os->name + "'");
+      }
+    }
+  }
 
   // If an OVERWRITE_SECTIONS specified output section is not in
   // sectionCommands, append it to the end. The section will be inserted by
@@ -725,6 +797,15 @@ void LinkerScript::processSectionCommands() {
   for (OutputDesc *osd : overwriteSections)
     if (osd->osec.partition == 1 && osd->osec.sectionIndex == UINT32_MAX)
       sectionCommands.push_back(osd);
+
+  // Input sections cannot have a section class parent past this point; they
+  // must have been assigned to an output section.
+  for (const auto &[_, sc] : sectionClasses)
+    for (InputSectionDescription *isd : sc->sc.commands)
+      for (InputSectionBase *sec : isd->sectionBases)
+        if (sec->parent && isa<SectionClass>(sec->parent))
+          error("section '" + sec->name + "' assigned to class '" +
+                sec->parent->name + "' but to no output section");
 }
 
 void LinkerScript::processSymbolAssignments() {
@@ -745,8 +826,8 @@ void LinkerScript::processSymbolAssignments() {
   for (SectionCommand *cmd : sectionCommands) {
     if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
       addSymbol(assign);
-    else
-      for (SectionCommand *subCmd : cast<OutputDesc>(cmd)->osec.commands)
+    else if (auto *od = dyn_cast<OutputDesc>(cmd))
+      for (SectionCommand *subCmd : od->osec.commands)
         if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
           addSymbol(assign);
   }
@@ -1416,6 +1497,8 @@ const Defined *LinkerScript::assignAddresses() {
       assign->size = dot - assign->addr;
       continue;
     }
+    if (isa<SectionClassDesc>(cmd))
+      continue;
     assignOffsets(&cast<OutputDesc>(cmd)->osec);
   }
 
@@ -1435,7 +1518,7 @@ static bool hasRegionOverflowed(MemoryRegion *mr) {
 // Under-estimates may cause unnecessary spills, but over-estimates can always
 // be corrected on the next pass.
 bool LinkerScript::spillSections() {
-  if (!config->enableNonContiguousRegions)
+  if (potentialSpillLists.empty())
     return false;
 
   bool spilled = false;
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 734d4e7498aa2..5da3bf3ab582d 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -35,6 +35,8 @@ class OutputSection;
 class SectionBase;
 class ThunkSection;
 struct OutputDesc;
+struct SectionClass;
+struct SectionClassDesc;
 
 // This represents an r-value in the linker script.
 struct ExprValue {
@@ -78,7 +80,8 @@ enum SectionsCommandKind {
   AssignmentKind, // . = expr or <sym> = expr
   OutputSectionKind,
   InputSectionKind,
-  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+  ByteKind,   // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
+  ClassKind,
 };
 
 struct SectionCommand {
@@ -198,9 +201,12 @@ class InputSectionDescription : public SectionCommand {
 
 public:
   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
-                          uint64_t withoutFlags = 0)
+                          uint64_t withoutFlags = 0, StringRef className = {})
       : SectionCommand(InputSectionKind), filePat(filePattern),
-        withFlags(withFlags), withoutFlags(withoutFlags) {}
+        withFlags(withFlags), withoutFlags(withoutFlags), className(className) {
+    assert((filePattern.empty() || className.empty()) &&
+           "file pattern and class name are mutually exclusive");
+  }
 
   static bool classof(const SectionCommand *c) {
     return c->kind == InputSectionKind;
@@ -228,6 +234,10 @@ class InputSectionDescription : public SectionCommand {
   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
   uint64_t withFlags;
   uint64_t withoutFlags;
+
+  // If present, input section matching uses class membership instead of file
+  // and section patterns (mutually exclusive).
+  StringRef className;
 };
 
 // Represents BYTE(), SHORT(), LONG(), or QUAD().
@@ -289,7 +299,7 @@ class LinkerScript final {
   SmallVector<InputSectionBase *, 0>
   computeInputSections(const InputSectionDescription *,
                        ArrayRef<InputSectionBase *>,
-                       const OutputSection &outCmd);
+                       const SectionBase &outCmd);
 
   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
 
@@ -413,6 +423,11 @@ class LinkerScript final {
     PotentialSpillSection *tail;
   };
   llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
+
+  // Named lists of input sections that can be collectively referenced in output
+  // section descriptions. Multiple references allow for sections to spill from
+  // one output section to another.
+  llvm::StringMap<SectionClassDesc*> sectionClasses;
 };
 
 struct ScriptWrapper {
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index c4f3fdde30f36..1bad529b40329 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -167,6 +167,8 @@ static void writeMapFile(raw_fd_ostream &os) {
       os << assign->commandString << '\n';
       continue;
     }
+    if (isa<SectionClassDesc>(cmd))
+      continue;
 
     osec = &cast<OutputDesc>(cmd)->osec;
     writeHeader(os, osec->addr, osec->getLMA(), osec->size, osec->addralign);
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 78fede48a23f2..e77f774556fa0 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -137,6 +137,27 @@ struct OutputDesc final : SectionCommand {
   }
 };
 
+// A list of input sections that can be referenced in output descriptions.
+// Multiple references allow sections to spill from one output section to the
+// next.
+struct SectionClass final : public SectionBase {
+  SmallVector<InputSectionDescription *, 0> commands;
+  bool assigned = false;
+
+  SectionClass(StringRef name) : SectionBase(Class, name, 0, 0, 0, 0, 0, 0) {}
+  static bool classof(const SectionBase *s) { return s->kind() == Class; }
+};
+
+struct SectionClassDesc : SectionCommand {
+  SectionClass sc;
+
+  SectionClassDesc(StringRef name) : SectionCommand(ClassKind), sc(name) {}
+
+  static bool classof(const SectionCommand *c) {
+    return c->kind == ClassKind;
+  }
+};
+
 int getPriority(StringRef s);
 
 InputSection *getFirstInputSection(const OutputSection *os);
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index f90ce6fa74075..422bb63b8c03a 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -96,6 +96,8 @@ class ScriptParser final : ScriptLexer {
   OutputDesc *readOverlaySectionDescription();
   OutputDesc *readOutputSectionDescription(StringRef outSec);
   SmallVector<SectionCommand *, 0> readOverlay();
+  SectionClassDesc *readSectionClassDescription();
+  StringRef readSectionClassName();
   SmallVector<StringRef, 0> readOutputSectionPhdrs();
   std::pair<uint64_t, uint64_t> readInputSectionFlags();
   InputSectionDescription *readInputSectionDescription(StringRef tok);
@@ -585,6 +587,35 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() {
   return v;
 }
 
+SectionClassDesc *ScriptParser::readSectionClassDescription() {
+  std::string loc = getCurrentLocation();
+  StringRef name = readSectionClassName();
+  SectionClassDesc *desc = make<Sectio...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/95323


More information about the llvm-commits mailing list