[lld] d2dd36b - [ELF] Better resemble GNU ld when placing orphan sections into memory regions

Igor Kudrin via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 11 00:08:18 PST 2021


Author: Igor Kudrin
Date: 2021-11-11T15:07:38+07:00
New Revision: d2dd36bbbe508ba97ab0adc5a834306f6fdc3a78

URL: https://github.com/llvm/llvm-project/commit/d2dd36bbbe508ba97ab0adc5a834306f6fdc3a78
DIFF: https://github.com/llvm/llvm-project/commit/d2dd36bbbe508ba97ab0adc5a834306f6fdc3a78.diff

LOG: [ELF] Better resemble GNU ld when placing orphan sections into memory regions

An orphan section should be placed in the same memory region as its
anchor section if the latter specifies the memory region explicitly.
If there is no explicit assignment for the anchor section in the linker
script, its memory region is selected by matching attributes, and the
same should be done for the orphan section.

Before the patch, some scripts that were handled smoothly in GNU ld
caused an "error: no memory region specified for section" in lld.

Differential Revision: https://reviews.llvm.org/D112925

Added: 
    lld/test/ELF/linkerscript/orphan-memory.test

Modified: 
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/ELF/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 189a703fb5083..1810d8acaa568 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -882,34 +882,41 @@ void LinkerScript::switchTo(OutputSection *sec) {
 
 // This function searches for a memory region to place the given output
 // section in. If found, a pointer to the appropriate memory region is
-// returned. Otherwise, a nullptr is returned.
-MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *sec) {
+// returned in the first member of the pair. Otherwise, a nullptr is returned.
+// The second member of the pair is a hint that should be passed to the
+// subsequent call of this method.
+std::pair<MemoryRegion *, MemoryRegion *>
+LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) {
   // If a memory region name was specified in the output section command,
   // then try to find that region first.
   if (!sec->memoryRegionName.empty()) {
     if (MemoryRegion *m = memoryRegions.lookup(sec->memoryRegionName))
-      return m;
+      return {m, m};
     error("memory region '" + sec->memoryRegionName + "' not declared");
-    return nullptr;
+    return {nullptr, nullptr};
   }
 
   // If at least one memory region is defined, all sections must
   // belong to some memory region. Otherwise, we don't need to do
   // anything for memory regions.
   if (memoryRegions.empty())
-    return nullptr;
+    return {nullptr, nullptr};
+
+  // An allocatable orphan section should continue the previous memory region.
+  if (sec->sectionIndex == UINT32_MAX && (sec->flags & SHF_ALLOC) && hint)
+    return {hint, hint};
 
   // See if a region can be found by matching section flags.
   for (auto &pair : memoryRegions) {
     MemoryRegion *m = pair.second;
     if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0)
-      return m;
+      return {m, nullptr};
   }
 
   // Otherwise, no suitable region was found.
   if (sec->flags & SHF_ALLOC)
     error("no memory region specified for section '" + sec->name + "'");
-  return nullptr;
+  return {nullptr, nullptr};
 }
 
 static OutputSection *findFirstSection(PhdrEntry *load) {
@@ -1132,6 +1139,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
 
 void LinkerScript::adjustSectionsAfterSorting() {
   // Try and find an appropriate memory region to assign offsets in.
+  MemoryRegion *hint = nullptr;
   for (BaseCommand *base : sectionCommands) {
     if (auto *sec = dyn_cast<OutputSection>(base)) {
       if (!sec->lmaRegionName.empty()) {
@@ -1140,7 +1148,7 @@ void LinkerScript::adjustSectionsAfterSorting() {
         else
           error("memory region '" + sec->lmaRegionName + "' not declared");
       }
-      sec->memRegion = findMemoryRegion(sec);
+      std::tie(sec->memRegion, hint) = findMemoryRegion(sec, hint);
     }
   }
 

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index d2487ae0f9d28..e6fec026d7eb4 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -272,7 +272,8 @@ class LinkerScript final {
 
   std::vector<size_t> getPhdrIndices(OutputSection *sec);
 
-  MemoryRegion *findMemoryRegion(OutputSection *sec);
+  std::pair<MemoryRegion *, MemoryRegion *>
+  findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
 
   void switchTo(OutputSection *sec);
   uint64_t advance(uint64_t size, unsigned align);

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index ff16922d80fd6..19a71ad9c1195 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1256,10 +1256,13 @@ findOrphanPos(std::vector<BaseCommand *>::iterator b,
   // Consider all existing sections with the same proximity.
   int proximity = getRankProximity(sec, *i);
   unsigned sortRank = sec->sortRank;
-  if (script->hasPhdrsCommands())
-    // Prevent the orphan section to be placed before the found section because
-    // that can result in adding it to a previous segment and changing flags of
-    // that segment, for example, making a read-only segment writable.
+  if (script->hasPhdrsCommands() || !script->memoryRegions.empty())
+    // Prevent the orphan section to be placed before the found section. If
+    // custom program headers are defined, that helps to avoid adding it to a
+    // previous segment and changing flags of that segment, for example, making
+    // a read-only segment writable. If memory regions are defined, an orphan
+    // section should continue the same region as the found section to better
+    // resemble the behavior of GNU ld.
     sortRank = std::max(sortRank, foundSec->sortRank);
   for (; i != e; ++i) {
     auto *curSec = dyn_cast<OutputSection>(*i);

diff  --git a/lld/test/ELF/linkerscript/orphan-memory.test b/lld/test/ELF/linkerscript/orphan-memory.test
new file mode 100644
index 0000000000000..77c0326f342fe
--- /dev/null
+++ b/lld/test/ELF/linkerscript/orphan-memory.test
@@ -0,0 +1,118 @@
+REQUIRES: x86
+
+RUN: split-file %s %ts
+RUN: llvm-mc -filetype=obj -triple=x86_64 %ts/s -o %t.o
+
+## Check that despite having a lower sort rank, an orphan section '.init_array'
+## is placed after '.data' and '.data2' and in the same memory region.
+
+## Also check that a non-SHF_ALLOC orphan section '.nonalloc' is not placed in
+## a memory region. Both defined memory regions are exhausted after all expected
+## sections are added, thus, trying to put any unexpected section would lead to
+## an error.
+
+RUN: ld.lld -o %t -T %ts/t %t.o
+RUN: llvm-readelf -S %t | FileCheck %s
+
+CHECK: Name        Type       Address          Off           Size
+CHECK: .text       PROGBITS   0000000000008000 {{[0-9a-f]+}} 000004
+CHECK: .data       PROGBITS   0000000000009000 {{[0-9a-f]+}} 000008
+CHECK: .data2      PROGBITS   0000000000009008 {{[0-9a-f]+}} 00000c
+CHECK: .init_array INIT_ARRAY 0000000000009014 {{[0-9a-f]+}} 000010
+CHECK: .nonalloc   PROGBITS   0000000000000000 {{[0-9a-f]+}} 000010
+
+## Check that attributes of memory regions are ignored for orphan sections when
+## the anchor section specifies the memory region explicitly, This seems to
+## contradict https://sourceware.org/binutils/docs/ld/MEMORY.html, but better
+## resembles the way GNU ld actually works.
+
+RUN: ld.lld -o %t2 -T %ts/t2 %t.o
+RUN: llvm-readelf -S %t2 | FileCheck %s
+
+## Same as the previous case, but now properties of sections conflict with
+## memory region attributes. Still, orphan sections are placed in the same
+## regions as their anchors.
+
+RUN: ld.lld -o %t3 -T %ts/t3 %t.o
+RUN: llvm-readelf -S %t3 | FileCheck %s
+
+## Check that when memory regions for anchor sections are not specified
+## explicitly and are selected by attributes, orphan sections are also assigned
+## to memory regions by matching properties.
+
+RUN: ld.lld -o %t4 -T %ts/t4 %t.o
+RUN: llvm-readelf -S %t4 | FileCheck %s --check-prefix=CHECK4
+
+CHECK4: Name        Type       Address          Off           Size
+CHECK4: .text       PROGBITS   0000000000008000 {{[0-9a-f]+}} 000004
+CHECK4: .init_array INIT_ARRAY 0000000000009000 {{[0-9a-f]+}} 000010
+CHECK4: .data       PROGBITS   0000000000009010 {{[0-9a-f]+}} 000008
+CHECK4: .data2      PROGBITS   0000000000009018 {{[0-9a-f]+}} 00000c
+CHECK4: .nonalloc   PROGBITS   0000000000000000 {{[0-9a-f]+}} 000010
+
+#--- s
+  .text
+  .zero 4
+
+  .data
+  .zero 8
+
+  .section .data2,"aw", at progbits
+  .zero 0xc
+
+  .section .init_array,"aw", at init_array
+  .zero 0x10
+
+  .section .nonalloc,""
+  .zero 0x10
+
+#--- t
+MEMORY
+{
+  TEXT : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t2
+MEMORY
+{
+  TEXT (rwx) : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (rwx) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t3
+MEMORY
+{
+  TEXT (!w) : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (!w) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t4
+MEMORY
+{
+  TEXT (rx)  : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (w!x) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) }
+}


        


More information about the llvm-commits mailing list