[lld] 9670029 - [ELF] Keep st_type for symbol assignment

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 20 16:05:53 PDT 2020


Author: Fangrui Song
Date: 2020-08-20T16:05:27-07:00
New Revision: 9670029b6b302c75bb373fb1814f4e02790c4da8

URL: https://github.com/llvm/llvm-project/commit/9670029b6b302c75bb373fb1814f4e02790c4da8
DIFF: https://github.com/llvm/llvm-project/commit/9670029b6b302c75bb373fb1814f4e02790c4da8.diff

LOG: [ELF] Keep st_type for symbol assignment

PR46970: for `alias = aliasee`, the alias can be used in relocation processing
and on ARM st_type does affect Thumb interworking. It is thus desirable for the
alias to get the same st_type.

Note that the st_size field should not be inherited because some tools use
st_size=0 as a heuristic to detect aliases. Retaining st_size can thwart such
heuristics and cause aliases to be preferred over the original symbols.

Differential Revision: https://reviews.llvm.org/D86263

Added: 
    lld/test/ELF/linkerscript/symbol-assign-type.s

Modified: 
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/docs/ELF/linker_script.rst
    lld/test/ELF/arm-thumb-interwork-ifunc.s
    lld/test/ELF/linkerscript/common-assign.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 7e97576923c97..11f0fc9d5fbe2 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -180,7 +180,7 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) {
   // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
   uint64_t symValue = value.sec ? 0 : value.getValue();
 
-  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE,
+  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type,
                  symValue, 0, sec);
 
   Symbol *sym = symtab->insert(cmd->name);
@@ -317,6 +317,7 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) {
     cmd->sym->section = v.sec;
     cmd->sym->value = v.getSectionOffset();
   }
+  cmd->sym->type = v.type;
 }
 
 static std::string getFilename(InputFile *file) {
@@ -1223,8 +1224,14 @@ ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) {
   }
 
   if (Symbol *sym = symtab->find(name)) {
-    if (auto *ds = dyn_cast<Defined>(sym))
-      return {ds->section, false, ds->value, loc};
+    if (auto *ds = dyn_cast<Defined>(sym)) {
+      ExprValue v{ds->section, false, ds->value, loc};
+      // Retain the original st_type, so that the alias will get the same
+      // behavior in relocation processing. Any operation will reset st_type to
+      // STT_NOTYPE.
+      v.type = ds->type;
+      return v;
+    }
     if (isa<SharedSymbol>(sym))
       if (!errorOnMissingSection)
         return {nullptr, false, 0, loc};

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index ec4fc22db486b..4a1a5fd71b67f 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -59,6 +59,10 @@ struct ExprValue {
   uint64_t val;
   uint64_t alignment = 1;
 
+  // The original st_type if the expression represents a symbol. Any operation
+  // resets type to STT_NOTYPE.
+  uint8_t type = llvm::ELF::STT_NOTYPE;
+
   // Original source location. Used for error messages.
   std::string loc;
 };

diff  --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst
index 0f409b2020ace..debddbf511b60 100644
--- a/lld/docs/ELF/linker_script.rst
+++ b/lld/docs/ELF/linker_script.rst
@@ -17,6 +17,25 @@ possible. We reserve the right to make 
diff erent implementation choices where
 it is appropriate for LLD. Intentional deviations will be documented in this
 file.
 
+Symbol assignment
+~~~~~~~~~~~~~~~~~
+
+A symbol assignment looks like:
+
+::
+
+  symbol = expression;
+  symbol += expression;
+
+The first form defines ``symbol``. If ``symbol`` is already defined, it will be
+overridden. The other form requires ``symbol`` to be already defined.
+
+For a simple assignment like ``alias = aliasee;``, the ``st_type`` field is
+copied from the original symbol. Any arithmetic operation (e.g. ``+ 0`` will
+reset ``st_type`` to ``STT_NOTYPE``.
+
+The ``st_size`` field is set to 0.
+
 Output section description
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff  --git a/lld/test/ELF/arm-thumb-interwork-ifunc.s b/lld/test/ELF/arm-thumb-interwork-ifunc.s
index 947bc2dd77863..f77439c6c50b4 100644
--- a/lld/test/ELF/arm-thumb-interwork-ifunc.s
+++ b/lld/test/ELF/arm-thumb-interwork-ifunc.s
@@ -3,6 +3,11 @@
 // RUN: ld.lld %t.o -o %t
 // RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
 
+/// A symbol assignment defined alias inherits st_type and gets the same treatment.
+// RUN: llvm-mc --triple=armv7a-linux-gnueabihf -arm-add-build-attributes -filetype=obj --defsym ALIAS=1 -o %t1.o %s
+// RUN: ld.lld --defsym foo=foo1 %t1.o -o %t1
+// RUN: llvm-objdump --triple=armv7a-none-linux-gnueabi -d --no-show-raw-insn %t | FileCheck %s
+
 /// Non-preemptible ifuncs are called via a PLT entry which is always Arm
 /// state, expect the ARM callers to go direct to the PLT entry, Thumb
 /// branches are indirected via state change thunks, the bl is changed to blx.
@@ -10,9 +15,15 @@
  .syntax unified
  .text
  .balign 0x1000
+.ifdef ALIAS
+ .type foo1 STT_GNU_IFUNC
+ .globl foo1
+foo1:
+.else
  .type foo STT_GNU_IFUNC
  .globl foo
 foo:
+.endif
  bx lr
 
  .section .text.1, "ax", %progbits

diff  --git a/lld/test/ELF/linkerscript/common-assign.s b/lld/test/ELF/linkerscript/common-assign.s
index ef0ad14ce92dd..f0d783886e4d6 100644
--- a/lld/test/ELF/linkerscript/common-assign.s
+++ b/lld/test/ELF/linkerscript/common-assign.s
@@ -27,7 +27,7 @@
 # CHECK-NEXT:     Value: [[FOO]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }
@@ -36,7 +36,7 @@
 # CHECK-NEXT:     Value: [[BAR]]
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Global
-# CHECK-NEXT:     Type: None
+# CHECK-NEXT:     Type: Object
 # CHECK-NEXT:     Other: 0
 # CHECK-NEXT:     Section: .bss
 # CHECK-NEXT:   }

diff  --git a/lld/test/ELF/linkerscript/symbol-assign-type.s b/lld/test/ELF/linkerscript/symbol-assign-type.s
new file mode 100644
index 0000000000000..3f7dfd6faad47
--- /dev/null
+++ b/lld/test/ELF/linkerscript/symbol-assign-type.s
@@ -0,0 +1,45 @@
+# REQUIRES: x86
+## Keep st_type for simple assignment (`alias = aliasee`). This property is
+## desired on some targets, where symbol types can affect relocation processing
+## (e.g. Thumb interworking). However, the st_size field should not be retained
+## because some tools use st_size=0 as a heuristic to detect aliases. With any
+## operation, it can be argued that the new symbol may not be of the same type,
+## so reset st_type to STT_NOTYPE.
+
+## NOTE: GNU ld retains st_type for many operations.
+
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t.o
+# RUN: ld.lld -T %t/a.lds %t.o -o %t1
+# RUN: llvm-readelf -s %t1 | FileCheck %s
+
+# CHECK:      Size Type   Bind   Vis     Ndx Name
+# CHECK:         1 FUNC   GLOBAL DEFAULT   1 _start
+# CHECK:         0 FUNC   GLOBAL DEFAULT   1 retain1
+# CHECK-NEXT:    0 FUNC   GLOBAL DEFAULT   1 retain2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT   1 drop1
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop2
+# CHECK-NEXT:    0 NOTYPE GLOBAL DEFAULT ABS drop3
+
+# RUN: ld.lld --defsym 'retain=_start' --defsym 'drop=_start+0' %t.o -o %t2
+# RUN: llvm-readelf -s %t2 | FileCheck %s --check-prefix=DEFSYM
+
+# DEFSYM:        0 FUNC   GLOBAL DEFAULT   1 retain
+# DEFSYM-NEXT:   0 NOTYPE GLOBAL DEFAULT   1 drop
+
+#--- a.lds
+retain1 = _start;
+retain2 = 1 ? _start : 0;
+
+## Reset to STT_NOTYPE if any operation is performed,
+## even if the operation is an identity function.
+drop1 = _start + 0;
+drop2 = 0 ? _start : 1;
+drop3 = -_start;
+
+#--- main.s
+.globl _start
+.type _start, @function
+_start:
+  ret
+.size _start, 1


        


More information about the llvm-commits mailing list