[PATCH] D120389: [ELF] Check COMMON symbols for PROVIDE and don't redefine COMMON symbols edata/end/etext

Fangrui Song via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 22 23:29:14 PST 2022


MaskRay created this revision.
MaskRay added reviewers: pcc, peter.smith, rprichard.
Herald added subscribers: arichardson, emaste.
MaskRay requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

The GNU ld definitions precedence: regular symbol assignment > relocatable object definition > `PROVIDE` symbol assignment.

GNU ld's internal linker scripts define the non-reserved (by C and C++)
edata/end/etext with `PROVIDE` so the relocatable object definition takes
precedence. This makes sense because `int end;` is valid.

We currently redefine such symbols if they are COMMON, but not if they are
regular definitions, so `int end;` with -fcommon is essentially a UB in ld.lld.
Fix this (also improve consistency and GNU ld consistency) by using the
`isDefined` code path for `isCommon`. In GNU ld, reserved identifiers like
`__ehdr_start` do not use `PROVIDE`, while we treat them all as `PROVIDE`, this
seems fine.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120389

Files:
  lld/ELF/LinkerScript.cpp
  lld/ELF/Writer.cpp
  lld/test/ELF/edata-etext.s


Index: lld/test/ELF/edata-etext.s
===================================================================
--- lld/test/ELF/edata-etext.s
+++ lld/test/ELF/edata-etext.s
@@ -1,7 +1,10 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld %t.o -o %t
-# RUN: llvm-objdump -t --section-headers %t | FileCheck %s
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/a.s -o %t/a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/b.s -o %t/b.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t/c.s -o %t/c.o
+# RUN: ld.lld %t/a.o -o %t/a
+# RUN: llvm-objdump -t --section-headers %t/a | FileCheck %s
 
 ## This checks that:
 ## 1) Address of _etext is the first location after the last read-only loadable segment.
@@ -32,6 +35,23 @@
 # RELOCATABLE-NEXT:  0000000000000000 *UND* 0000000000000000 _end
 # RELOCATABLE-NEXT:  0000000000000000 *UND* 0000000000000000 _etext
 
+## If a relocatable object file defines non-reserved identifiers (by C and C++)
+## edata/end/etext, don't redefine them. Note: GNU ld redefines the reserved
+## _edata while we don't for simplicty.
+# RUN: ld.lld %t/b.o -o %t/b
+# RUN: llvm-objdump -t %t/b | FileCheck %s --check-prefix=CHECK2
+# RUN: ld.lld %t/c.o -o %t/c
+# RUN: llvm-objdump -t %t/c | FileCheck %s --check-prefix=CHECK2
+## PROVIDE does not redefine defined symbols, even if COMMON.
+# RUN: ld.lld %t/c.o %t/lds -o %t/c
+# RUN: llvm-objdump -t %t/c | FileCheck %s --check-prefix=CHECK2
+
+# CHECK2:       [[#%x,]] g     O .bss   0000000000000001 _edata
+# CHECK2-NEXT:  [[#%x,]] g     O .bss   0000000000000001 edata
+# CHECK2-NEXT:  [[#%x,]] g     O .bss   0000000000000001 end
+# CHECK2-NEXT:  [[#%x,]] g     O .bss   0000000000000001 etext
+
+#--- a.s
 .global _edata,_end,_etext,_start,edata,end,etext
 .text
 _start:
@@ -41,3 +61,31 @@
 .bss
   .align 4
   .space 6
+
+#--- b.s
+.bss
+.macro def x
+  .globl \x
+  .type \x, @object
+  \x: .byte 0
+  .size \x, 1
+.endm
+def _edata
+def edata
+def end
+def etext
+
+#--- c.s
+.comm _edata,1,1
+.comm edata,1,1
+.comm end,1,1
+.comm etext,1,1
+
+#--- lds
+SECTIONS {
+  .text : { *(.text) }
+
+  PROVIDE(etext = .);
+  PROVIDE(edata = .);
+  PROVIDE(end = .);
+}
Index: lld/ELF/Writer.cpp
===================================================================
--- lld/ELF/Writer.cpp
+++ lld/ELF/Writer.cpp
@@ -165,7 +165,7 @@
 static Defined *addOptionalRegular(StringRef name, SectionBase *sec,
                                    uint64_t val, uint8_t stOther = STV_HIDDEN) {
   Symbol *s = symtab->find(name);
-  if (!s || s->isDefined())
+  if (!s || s->isDefined() || s->isCommon())
     return nullptr;
 
   s->resolve(Defined{nullptr, StringRef(), STB_GLOBAL, stOther, STT_NOTYPE, val,
Index: lld/ELF/LinkerScript.cpp
===================================================================
--- lld/ELF/LinkerScript.cpp
+++ lld/ELF/LinkerScript.cpp
@@ -203,7 +203,7 @@
   // If a symbol was in PROVIDE(), we need to define it only
   // when it is a referenced undefined symbol.
   Symbol *b = symtab->find(cmd->name);
-  if (b && !b->isDefined())
+  if (b && !b->isDefined() && !b->isCommon())
     return true;
   return false;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120389.410722.patch
Type: text/x-patch
Size: 3220 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220223/18310a4b/attachment.bin>


More information about the llvm-commits mailing list