[lld] ec29538 - [ELF] Assign file offsets of non-SHF_ALLOC after SHF_ALLOC and set sh_addr=0 to non-SHF_ALLOC

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 18 09:03:09 PDT 2020


Author: Fangrui Song
Date: 2020-08-18T09:03:01-07:00
New Revision: ec29538af2e0886a65f479d6a533956a1c478132

URL: https://github.com/llvm/llvm-project/commit/ec29538af2e0886a65f479d6a533956a1c478132
DIFF: https://github.com/llvm/llvm-project/commit/ec29538af2e0886a65f479d6a533956a1c478132.diff

LOG: [ELF] Assign file offsets of non-SHF_ALLOC after SHF_ALLOC and set sh_addr=0 to non-SHF_ALLOC

* GNU ld places non-SHF_ALLOC sections after SHF_ALLOC sections. This has the
  advantage that the file offsets of a non-SHF_ALLOC cannot be contained in
  a PT_LOAD. This patch matches the behavior.
* For non-SHF_ALLOC non-orphan sections, GNU ld may assign non-zero sh_addr and
  treat them similar to SHT_NOBITS (not advance location counter). This
  is an alternative approach to what we have done in D85100.
  By placing non-SHF_ALLOC sections at the end, we can drop special
  cases in createSection and findOrphanPos added by D85100.

  Different from GNU ld, we set sh_addr to 0 for non-SHF_ALLOC sections. 0
  arguably is better because non-SHF_ALLOC sections don't appear in the memory
  image.

ELF spec says:

> sh_addr - If the section will appear in the memory image of a process, this
> member gives the address at which the section's first byte should
> reside. Otherwise, the member contains 0.

D85100 appeared to take a detour. If we take a combined view on D85100 and this
patch, the overall complexity slightly increases (one more 3-line loop) and
compatibility with GNU ld improves.

The behavior we don't want to match is the special treatment of .symtab
.shstrtab .strtab: they can be matched in LLD but not in GNU ld.

Reviewed By: jhenderson, psmith

Differential Revision: https://reviews.llvm.org/D85867

Added: 
    lld/test/ELF/linkerscript/sections-nonalloc.s

Modified: 
    lld/ELF/LinkerScript.cpp
    lld/ELF/Writer.cpp
    lld/test/ELF/linkerscript/memory-region-alignment.test
    lld/test/ELF/linkerscript/sections.s
    lld/test/ELF/linkerscript/symbols-non-alloc.test

Removed: 
    


################################################################################
diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index a187aa1eb05a..7e97576923c9 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -586,8 +586,6 @@ static OutputSection *findByName(ArrayRef<BaseCommand *> vec,
 static OutputSection *createSection(InputSectionBase *isec,
                                     StringRef outsecName) {
   OutputSection *sec = script->createOutputSection(outsecName, "<internal>");
-  if (!(isec->flags & SHF_ALLOC))
-    sec->addrExpr = [] { return 0; };
   sec->recordSection(isec);
   return sec;
 }
@@ -852,21 +850,27 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
 void LinkerScript::assignOffsets(OutputSection *sec) {
   const bool sameMemRegion = ctx->memRegion == sec->memRegion;
   const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr;
+  const uint64_t savedDot = dot;
   ctx->memRegion = sec->memRegion;
   ctx->lmaRegion = sec->lmaRegion;
-  if (ctx->memRegion)
-    dot = ctx->memRegion->curPos;
-
-  if (sec->addrExpr)
-    setDot(sec->addrExpr, sec->location, false);
 
-  // If the address of the section has been moved forward by an explicit
-  // expression so that it now starts past the current curPos of the enclosing
-  // region, we need to expand the current region to account for the space
-  // between the previous section, if any, and the start of this section.
-  if (ctx->memRegion && ctx->memRegion->curPos < dot)
-    expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
-                       ctx->memRegion->name, sec->name);
+  if (sec->flags & SHF_ALLOC) {
+    if (ctx->memRegion)
+      dot = ctx->memRegion->curPos;
+    if (sec->addrExpr)
+      setDot(sec->addrExpr, sec->location, false);
+
+    // If the address of the section has been moved forward by an explicit
+    // expression so that it now starts past the current curPos of the enclosing
+    // region, we need to expand the current region to account for the space
+    // between the previous section, if any, and the start of this section.
+    if (ctx->memRegion && ctx->memRegion->curPos < dot)
+      expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
+                         ctx->memRegion->name, sec->name);
+  } else {
+    // Non-SHF_ALLOC sections have zero addresses.
+    dot = 0;
+  }
 
   switchTo(sec);
 
@@ -918,6 +922,11 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
     for (InputSection *sec : cast<InputSectionDescription>(base)->sections)
       output(sec);
   }
+
+  // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
+  // as they are not part of the process image.
+  if (!(sec->flags & SHF_ALLOC))
+    dot = savedDot;
 }
 
 static bool isDiscardable(OutputSection &sec) {

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index cffdce0d6c31..b26817b66e27 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1234,13 +1234,7 @@ static bool shouldSkip(BaseCommand *cmd) {
 static std::vector<BaseCommand *>::iterator
 findOrphanPos(std::vector<BaseCommand *>::iterator b,
               std::vector<BaseCommand *>::iterator e) {
-  // OutputSections without the SHF_ALLOC flag are not part of the memory image
-  // and their addresses usually don't matter. Place any orphan sections without
-  // the SHF_ALLOC flag at the end so that these do not affect the address
-  // assignment of OutputSections with the SHF_ALLOC flag.
   OutputSection *sec = cast<OutputSection>(*e);
-  if (!(sec->flags & SHF_ALLOC))
-    return e;
 
   // Find the first element that has as close a rank as possible.
   auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) {
@@ -2589,7 +2583,11 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
       if (p->p_type == PT_LOAD && (p->p_flags & PF_X))
         lastRX = p;
 
+  // Layout SHF_ALLOC sections before non-SHF_ALLOC sections. A non-SHF_ALLOC
+  // will not occupy file offsets contained by a PT_LOAD.
   for (OutputSection *sec : outputSections) {
+    if (!(sec->flags & SHF_ALLOC))
+      continue;
     off = setFileOffset(sec, off);
 
     // If this is a last section of the last executable segment and that
@@ -2599,6 +2597,9 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
         lastRX->lastSec == sec)
       off = alignTo(off, config->commonPageSize);
   }
+  for (OutputSection *sec : outputSections)
+    if (!(sec->flags & SHF_ALLOC))
+      off = setFileOffset(sec, off);
 
   sectionHeaderOff = alignTo(off, config->wordsize);
   fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr);

diff  --git a/lld/test/ELF/linkerscript/memory-region-alignment.test b/lld/test/ELF/linkerscript/memory-region-alignment.test
index f0540a7f11a7..ea858299a7eb 100644
--- a/lld/test/ELF/linkerscript/memory-region-alignment.test
+++ b/lld/test/ELF/linkerscript/memory-region-alignment.test
@@ -1,5 +1,5 @@
 # REQUIRES: x86
-# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"M", at progbits,1; .byte 0' > %t.s
+# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"aM", at progbits,1; .byte 0' > %t.s
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t.s -o %t.o
 
 MEMORY {
@@ -28,24 +28,25 @@ SECTIONS {
 # CHECK-NEXT:   Offset: 0x1008
 # CHECK-NEXT:   Size: 8
 
-# CHECK:        Name: .text
+# CHECK:        Name: .zed
 # CHECK-NEXT:   Type: SHT_PROGBITS
 # CHECK-NEXT:   Flags [
 # CHECK-NEXT:     SHF_ALLOC
-# CHECK-NEXT:     SHF_EXECINSTR
+# CHECK-NEXT:     SHF_MERGE
 # CHECK-NEXT:   ]
 # CHECK-NEXT:   Address: 0x10
 # CHECK-NEXT:   Offset: 0x1010
-# CHECK-NEXT:   Size: 0
+# CHECK-NEXT:   Size: 1
 
-# CHECK:        Name: .zed
+# CHECK:        Name: .text
 # CHECK-NEXT:   Type: SHT_PROGBITS
 # CHECK-NEXT:   Flags [
-# CHECK-NEXT:     SHF_MERGE
+# CHECK-NEXT:     SHF_ALLOC
+# CHECK-NEXT:     SHF_EXECINSTR
 # CHECK-NEXT:   ]
-# CHECK-NEXT:   Address: 0x10
-# CHECK-NEXT:   Offset: 0x1010
-# CHECK-NEXT:   Size: 1
+# CHECK-NEXT:   Address: 0x14
+# CHECK-NEXT:   Offset: 0x1014
+# CHECK-NEXT:   Size: 0
 
 # CHECK:        Name: .comment
 # CHECK-NEXT:   Type: SHT_PROGBITS
@@ -54,5 +55,5 @@ SECTIONS {
 # CHECK-NEXT:     SHF_STRINGS
 # CHECK-NEXT:   ]
 # CHECK-NEXT:   Address: 0x0
-# CHECK-NEXT:   Offset: 0x1011
+# CHECK-NEXT:   Offset: 0x1014
 # CHECK-NEXT:   Size: 8

diff  --git a/lld/test/ELF/linkerscript/sections-nonalloc.s b/lld/test/ELF/linkerscript/sections-nonalloc.s
new file mode 100644
index 000000000000..a0669f701d8c
--- /dev/null
+++ b/lld/test/ELF/linkerscript/sections-nonalloc.s
@@ -0,0 +1,90 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t.o
+
+## Non-SHF_ALLOC sections are placed after all SHF_ALLOC sections. They will
+## thus not be contained in a PT_LOAD segment. data2 has a PT_LOAD segment,
+## even if it is preceded by a non-SHF_ALLOC section. Non-SHF_ALLOC orphan
+## sections have zero addresses.
+## NOTE: GNU ld assigns non-zero addresses to non-SHF_ALLOC non-orphan sections.
+# RUN: ld.lld -T %t/a.lds %t.o -o %ta
+# RUN: llvm-readelf -S -l %ta | FileCheck %s
+
+# CHECK:       [Nr] Name      Type     Address          Off    Size   ES Flg Lk
+# CHECK-NEXT:  [ 0]           NULL     0000000000000000 000000 000000 00      0
+# CHECK-NEXT:  [ 1] .bss      NOBITS   0000000000000000 001000 000001 00  WA  0
+# CHECK-NEXT:  [ 2] data1     PROGBITS 0000000000000001 001001 000001 00  WA  0
+# CHECK-NEXT:  [ 3] data3     PROGBITS 0000000000000002 001002 000001 00  WA  0
+# CHECK-NEXT:  [ 4] other1    PROGBITS 0000000000000000 001008 000001 00      0
+# CHECK-NEXT:  [ 5] other2    PROGBITS 0000000000000000 001010 000001 00      0
+## Orphan placement places other3, .symtab, .shstrtab and .strtab after other2.
+# CHECK-NEXT:  [ 6] other3    PROGBITS 0000000000000000 001020 000001 00      0
+# CHECK-NEXT:  [ 7] .symtab   SYMTAB   0000000000000000 001028 000030 18      9
+# CHECK-NEXT:  [ 8] .shstrtab STRTAB   0000000000000000 001058 00004d 00      0
+# CHECK-NEXT:  [ 9] .strtab   STRTAB   0000000000000000 0010a5 000008 00      0
+# CHECK-NEXT:  [10] data2     PROGBITS 0000000000000003 001003 000001 00  WA  0
+# CHECK-NEXT:  [11] .text     PROGBITS 0000000000000004 001004 000001 00  AX  0
+
+# CHECK:       Type       Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+# CHECK-NEXT:  LOAD       0x001000 0x0000000000000000 0x0000000000000000 0x000004 0x000004 RW  0x1000
+# CHECK-NEXT:  LOAD       0x001004 0x0000000000000004 0x0000000000000004 0x000001 0x000001 R E 0x1000
+# CHECK-NEXT:  GNU_STACK  0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0
+
+# RUN: ld.lld -T %t/b.lds %t.o -o %tb
+# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1
+
+# CHECK1:      [Nr] Name      Type     Address          Off    Size   ES Flg Lk
+# CHECK1-NEXT: [ 0]           NULL     0000000000000000 000000 000000 00      0
+# CHECK1-NEXT: [ 1] .text     PROGBITS 00000000000000b0 0000b0 000001 00  AX  0
+# CHECK1-NEXT: [ 2] .bss      NOBITS   00000000000000b1 0000b1 000001 00  WA  0
+# CHECK1-NEXT: [ 3] data1     PROGBITS 00000000000000b2 0000b2 000001 00  WA  0
+# CHECK1-NEXT: [ 4] data3     PROGBITS 00000000000000b3 0000b3 000001 00  WA  0
+# CHECK1-NEXT: [ 5] other1    PROGBITS 0000000000000000 0000b8 000001 00      0
+# CHECK1-NEXT: [ 6] other2    PROGBITS 0000000000000000 0000c0 000001 00      0
+# CHECK1-NEXT: [ 7] other3    PROGBITS 0000000000000000 0000d0 000001 00      0
+# CHECK1-NEXT: [ 8] .symtab   SYMTAB   0000000000000000 0000d8 000030 18     10
+# CHECK1-NEXT: [ 9] .shstrtab STRTAB   0000000000000000 000108 00004d 00      0
+# CHECK1-NEXT: [10] .strtab   STRTAB   0000000000000000 000155 000008 00      0
+# CHECK1-NEXT: [11] data2     PROGBITS 00000000000000b4 0000b4 000001 00  WA  0
+# CHECK1:      Type       Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+# CHECK1-NEXT: LOAD       0x000000 0x0000000000000000 0x0000000000000000 0x0000b5 0x0000b5 RWE 0x1000
+# CHECK1-NEXT: 0x60000000 0x0000b8 0x0000000000000000 0x0000000000000000 0x000009 0x000001     0x8
+
+#--- a.lds
+SECTIONS {
+  .bss : { *(.bss) }
+  data1 : { *(data1) }
+  other1 : { *(other1) }
+  other2 : { *(other2) }
+  data2 : { *(data2) }
+  .text : { *(.text) }
+  /DISCARD/ : { *(.comment) }
+}
+
+#--- b.lds
+PHDRS {
+  text PT_LOAD FILEHDR PHDRS;
+  foo 0x60000000 FLAGS (0);
+}
+SECTIONS {
+  . = SIZEOF_HEADERS;
+  .text : { *(.text) } : text
+  .bss : { *(.bss) } : text
+  data1 : { *(data1) } : text
+  other1 : { *(other1) } : foo
+  other2 : { *(other2) } : foo
+  data2 : { *(data1) } : text
+  /DISCARD/ : { *(.comment) }
+}
+
+#--- main.s
+.globl _start
+_start: nop
+.section data1,"aw"; .byte 0
+.section data2,"aw"; .byte 0
+.section data3,"aw"; .byte 0
+.bss; .byte 0
+
+.section other1; .p2align 2; .byte 0
+.section other2; .p2align 3; .byte 0
+.section other3; .p2align 4; .byte 0

diff  --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s
index fa346406b743..539aa9c17058 100644
--- a/lld/test/ELF/linkerscript/sections.s
+++ b/lld/test/ELF/linkerscript/sections.s
@@ -25,39 +25,6 @@
 # SEC-DEFAULT: 7 .shstrtab     0000003b {{[0-9a-f]*}}
 # SEC-DEFAULT: 8 .strtab       00000008 {{[0-9a-f]*}}
 
-## Sections are placed in the order specified by the linker script. .data has
-## a PT_LOAD segment, even if it is preceded by a non-alloc section. To
-## allow this, place non-alloc orphan sections at the end and advance
-## location counters for non-alloc non-orphan sections.
-# RUN: echo "SECTIONS { \
-# RUN:          .bss : { *(.bss) } \
-# RUN:          other : { *(other) } \
-# RUN:          .shstrtab : { *(.shstrtab) } \
-# RUN:          .symtab : { *(.symtab) } \
-# RUN:          .strtab : { *(.strtab) } \
-# RUN:          .data : { *(.data) } \
-# RUN:          .text : { *(.text) } }" > %t3.lds
-# RUN: ld.lld -o %t3a -T %t3.lds %t
-# RUN: llvm-readelf -S -l %t3a | FileCheck --check-prefix=SEC-ORDER %s
-# RUN: ld.lld -o %t3b -T %t3.lds --unique %t
-# RUN: llvm-readelf -S -l %t3b | FileCheck --check-prefix=SEC-ORDER %s
-
-# SEC-ORDER:       [Nr] Name      Type     Address          Off    Size   ES Flg
-# SEC-ORDER:       [ 0]           NULL     0000000000000000 000000 000000 00
-# SEC-ORDER-NEXT:  [ 1] .bss      NOBITS   0000000000000000 001000 000002 00  WA
-# SEC-ORDER-NEXT:  [ 2] other     PROGBITS 0000000000000002 001002 000003 00  WA
-# SEC-ORDER-NEXT:  [ 3] .shstrtab STRTAB   0000000000000005 001005 00003b 00
-# SEC-ORDER-NEXT:  [ 4] .symtab   SYMTAB   0000000000000040 001040 000030 18
-# SEC-ORDER-NEXT:  [ 5] .strtab   STRTAB   0000000000000070 001070 000008 00
-# SEC-ORDER-NEXT:  [ 6] .data     PROGBITS 0000000000000078 001078 000020 00  WA
-# SEC-ORDER-NEXT:  [ 7] .text     PROGBITS 0000000000000098 001098 00000e 00  AX
-# SEC-ORDER-NEXT:  [ 8] .comment  PROGBITS 0000000000000000 0010a6 000008 01  MS
-
-# SEC-ORDER:        Type      Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
-# SEC-ORDER-NEXT:   LOAD      0x001000 0x0000000000000000 0x0000000000000000 0x000098 0x000098 RW  0x1000
-# SEC-ORDER-NEXT:   LOAD      0x001098 0x0000000000000098 0x0000000000000098 0x00000e 0x00000e R E 0x1000
-# SEC-ORDER-NEXT:   GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0
-
 # .text and .data have swapped names but proper sizes and types.
 # RUN: echo "SECTIONS { \
 # RUN:          .data : { *(.text) } \
@@ -112,12 +79,12 @@
 # SEP-BY-NONALLOC:      [ 1] .text     PROGBITS 0000000000000000 001000 00000e 00  AX
 # SEP-BY-NONALLOC-NEXT: [ 2] .data     PROGBITS 000000000000000e 00100e 000020 00  WA
 # SEP-BY-NONALLOC-NEXT: [ 3] .bss      NOBITS   000000000000002e 00102e 000002 00  WA
-# SEP-BY-NONALLOC-NEXT: [ 4] .comment  PROGBITS 0000000000000030 00102e 000008 01  MS
-# SEP-BY-NONALLOC-NEXT: [ 5] other     PROGBITS 0000000000000038 001038 000003 00  WA
+# SEP-BY-NONALLOC-NEXT: [ 4] .comment  PROGBITS 0000000000000000 001033 000008 01  MS
+# SEP-BY-NONALLOC:      [ 8] other     PROGBITS 0000000000000030 001030 000003 00  WA
 
 # SEP-BY-NONALLOC:      Type      Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
 # SEP-BY-NONALLOC-NEXT: LOAD      0x001000 0x0000000000000000 0x0000000000000000 0x00000e 0x00000e R E 0x1000
-# SEP-BY-NONALLOC-NEXT: LOAD      0x00100e 0x000000000000000e 0x000000000000000e 0x00002d 0x00002d RW  0x1000
+# SEP-BY-NONALLOC-NEXT: LOAD      0x00100e 0x000000000000000e 0x000000000000000e 0x000025 0x000025 RW  0x1000
 # SEP-BY-NONALLOC-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0
 
 # Input section pattern contains additional semicolon.

diff  --git a/lld/test/ELF/linkerscript/symbols-non-alloc.test b/lld/test/ELF/linkerscript/symbols-non-alloc.test
index 2bd6fc84df46..ca47b2bfbcac 100644
--- a/lld/test/ELF/linkerscript/symbols-non-alloc.test
+++ b/lld/test/ELF/linkerscript/symbols-non-alloc.test
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 ## The address of a symbol assignment after a non-SHF_ALLOC section equals the
-## end address of the section.
+## end address of the last SHF_ALLOC section.
 
 # RUN: echo '.section .nonalloc,""; .quad 0' \
 # RUN:   | llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t
@@ -8,10 +8,11 @@
 # RUN: llvm-objdump --section-headers -t %t2 | FileCheck %s
 
 # CHECK: Sections:
-# CHECK:  .nonalloc     00000008 0000000000000120
+# CHECK:  .text         00000000 0000000000000120
+# CHECK:  .nonalloc     00000008 0000000000000000
 
 # CHECK: SYMBOL TABLE:
-# CHECK:  0000000000000128 g .nonalloc 0000000000000000 Sym
+# CHECK:  0000000000000120 g .nonalloc 0000000000000000 Sym
 
 SECTIONS {
   . = SIZEOF_HEADERS;


        


More information about the llvm-commits mailing list