[llvm-branch-commits] [lld] c4e2167 - [ELF] Keep st_type for symbol assignment

Fangrui Song via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 24 12:18:22 PDT 2020


Author: Fangrui Song
Date: 2020-08-24T12:17:48-07:00
New Revision: c4e216711d001316df2313a85c8af73a85d804c0

URL: https://github.com/llvm/llvm-project/commit/c4e216711d001316df2313a85c8af73a85d804c0
DIFF: https://github.com/llvm/llvm-project/commit/c4e216711d001316df2313a85c8af73a85d804c0.diff

LOG: [ELF] Keep st_type for symbol assignment

PR46970: for `alias = aliasee`, the alias can be used in relocation processing
and on ARM st_type does affect Thumb interworking. It is thus desirable for the
alias to get the same st_type.

Note that the st_size field should not be inherited because some tools use
st_size=0 as a heuristic to detect aliases. Retaining st_size can thwart such
heuristics and cause aliases to be preferred over the original symbols.

Differential Revision: https://reviews.llvm.org/D86263

(cherry picked from commit 9670029b6b302c75bb373fb1814f4e02790c4da8)
The test symbol-assign-type.s was rewritten to not depend on 'split-file'.

Added: 
    lld/test/ELF/linkerscript/symbol-assign-type.s

Modified: 
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/docs/ELF/linker_script.rst
    lld/test/ELF/arm-thumb-interwork-ifunc.s
    lld/test/ELF/linkerscript/common-assign.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 72e2ebff9b8c..6de2cd65b973 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -180,7 +180,7 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) {
   // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
   uint64_t symValue = value.sec ? 0 : value.getValue();
 
-  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE,
+  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type,
                  symValue, 0, sec);
 
   Symbol *sym = symtab->insert(cmd->name);
@@ -317,6 +317,7 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) {
     cmd->sym->section = v.sec;
     cmd->sym->value = v.getSectionOffset();
   }
+  cmd->sym->type = v.type;
 }
 
 static std::string getFilename(InputFile *file) {
@@ -1215,8 +1216,14 @@ ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) {
   }
 
   if (Symbol *sym = symtab->find(name)) {
-    if (auto *ds = dyn_cast<Defined>(sym))
-      return {ds->section, false, ds->value, loc};
+    if (auto *ds = dyn_cast<Defined>(sym)) {
+      ExprValue v{ds->section, false, ds->value, loc};
+      // Retain the original st_type, so that the alias will get the same
+      // behavior in relocation processing. Any operation will reset st_type to
+      // STT_NOTYPE.
+      v.type = ds->type;
+      return v;
+    }
     if (isa<SharedSymbol>(sym))
       if (!errorOnMissingSection)
         return {nullptr, false, 0, loc};

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index ec4fc22db486..4a1a5fd71b67 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -59,6 +59,10 @@ struct ExprValue {
   uint64_t val;
   uint64_t alignment = 1;
 
+  // The original st_type if the expression represents a symbol. Any operation
+  // resets type to STT_NOTYPE.
+  uint8_t type = llvm::ELF::STT_NOTYPE;
+
   // Original source location. Used for error messages.
   std::string loc;
 };

diff  --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst
index 0f409b2020ac..debddbf511b6 100644
--- a/lld/docs/ELF/linker_script.rst
+++ b/lld/docs/ELF/linker_script.rst
@@ -17,6 +17,25 @@ possible. We reserve the right to make 
diff erent implementation choices where
 it is appropriate for LLD. Intentional deviations will be documented in this
 file.
 
+Symbol assignment
+~~~~~~~~~~~~~~~~~
+
+A symbol assignment looks like:
+
+::
+
+  symbol = expression;
+  symbol += expression;
+
+The first form defines ``symbol``. If ``symbol`` is already defined, it will be
+overridden. The other form requires ``symbol`` to be already defined.
+
+For a simple assignment like ``alias = aliasee;``, the ``st_type`` field is
+copied from the original symbol. Any arithmetic operation (e.g. ``+ 0`` will
+reset ``st_type`` to ``STT_NOTYPE``.
+
+The ``st_size`` field is set to 0.
+
 Output section description
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff  --git a/lld/test/ELF/arm-thumb-interwork-ifunc.s b/lld/test/ELF/arm-thumb-interwork-ifunc.s
index 947bc2dd7786..f77439c6c50b 100644
--- a/lld/test/ELF/arm-thumb-interwork-ifunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-ifunc.s
@@ -3,6 +3,11 @@
 // RUN: ld.lld %t.o -o %t
 // RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
 
+/// A symbol assignment defined alias inherits st_type and gets the same treatment.
+// RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj --defsym ALIAS=1 -o %t1.o %s
+// RUN: ld.lld --defsym foo=foo1 %t1.o -o %t1
+// RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
+
 /// Non-preemptible ifuncs are called via a PLT entry which is always Arm
 /// state, expect the ARM callers to go direct to the PLT entry, Thumb
 /// branches are indirected via state change thunks, the bl is changed to blx.
@@ -10,9 +15,15 @@
  .syntax unified
  .text
  .balign 0x1000
+.ifdef ALIAS
+ .type foo1 STT_GNU_IFUNC
+ .globl foo1
+foo1:
+.else
  .type foo STT_GNU_IFUNC
  .globl foo
 foo:
+.endif
  bx lr
 
  .section .text.1, "ax", %progbits

diff  --git a/lld/test/ELF/linkerscript/common-assign.s b/lld/test/ELF/linkerscript/common-assign.s
index ef0ad14ce92d..f0d783886e4d 100644
--- a/lld/test/ELF/linkerscript/common-assign.s
+++ b/lld/test/ELF/linkerscript/common-assign.s
@@ -27,7 +27,7 @@
 # CHECK-NEXT:     Value: [[FOO]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }
@@ -36,7 +36,7 @@
 # CHECK-NEXT:     Value: [[BAR]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }

diff  --git a/lld/test/ELF/linkerscript/symbol-assign-type.s b/lld/test/ELF/linkerscript/symbol-assign-type.s
new file mode 100644
index 000000000000..c3db8ce8c8ae
--- /dev/null
+++ b/lld/test/ELF/linkerscript/symbol-assign-type.s
@@ -0,0 +1,39 @@
+# REQUIRES: x86
+## Keep st_type for simple assignment (`alias = aliasee`). This property is
+## desired on some targets, where symbol types can affect relocation processing
+## (e.g. Thumb interworking). However, the st_size field should not be retained
+## because some tools use st_size=0 as a heuristic to detect aliases. With any
+## operation, it can be argued that the new symbol may not be of the same type,
+## so reset st_type to STT_NOTYPE.
+
+## NOTE: GNU ld retains st_type for many operations.
+
+# RUN: echo 'retain1 = _start; \
+# RUN:   retain2 = 1 ? _start : 0; \
+# RUN:   drop1 = _start + 0; \
+# RUN:   drop2 = 0 ? _start : 1; \
+# RUN:   drop3 = -_start; \
+# RUN: ' > %t.lds
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld -T %t.lds %t.o -o %t1
+# RUN: llvm-readelf -s %t1 | FileCheck %s
+
+# CHECK:      Size Type   Bind   Vis     Ndx Name
+# CHECK:         1 FUNC   GLOBAL DEFAULT   1 _start
+# CHECK:         0 FUNC   GLOBAL DEFAULT   1 retain1
+# CHECK-NEXT:    0 FUNC   GLOBAL DEFAULT   1 retain2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT   1 drop1
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop3
+
+# RUN: ld.lld --defsym 'retain=_start' --defsym 'drop=_start+0' %t.o -o %t2
+# RUN: llvm-readelf -s %t2 | FileCheck %s --check-prefix=DEFSYM
+
+# DEFSYM:        0 FUNC   GLOBAL DEFAULT   1 retain
+# DEFSYM-NEXT:   0 NOTYPE GLOBAL DEFAULT   1 drop
+
+.globl _start
+.type _start, @function
+_start:
+  ret
+.size _start, 1


        


More information about the llvm-branch-commits mailing list