[lld] r369889 - [ELF] Make LinkerScript::assignAddresses iterative

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 26 03:23:31 PDT 2019


Author: maskray
Date: Mon Aug 26 03:23:31 2019
New Revision: 369889

URL: http://llvm.org/viewvc/llvm-project?rev=369889&view=rev
Log:
[ELF] Make LinkerScript::assignAddresses iterative

PR42990. For `SECTIONS { b = a; . = 0xff00 + (a >> 8); a = .; }`,
we currently set st_value(a)=0xff00 while st_value(b)=0xffff.

The following call tree demonstrates the problem:

```
link<ELF64LE>(Args);
  Script->declareSymbols(); // insert a and b as absolute Defined
  Writer<ELFT>().run();
    Script->processSectionCommands();
      addSymbol(cmd);       // a and b are re-inserted. LinkerScript::getSymbolValue
                            // is lazily called by subsequent evaluation
    finalizeSections();
      forEachRelSec(scanRelocations<ELFT>);
        processRelocAux     // another problem PR42506, not affected by this patch
      finalizeAddressDependentContent(); // loop executed once
        script->assignAddresses(); // a = 0, b = 0xff00
    script->assignAddresses(); // a = 0xff00, _end = 0xffff
```

We need another assignAddresses() to finalize the value of `a`.

This patch

1) modifies assignAddress() to track the original section/value of each
  symbol and return a symbol whose section/value has changed.
2) moves the post-finalizeSections assignAddress() inside the loop
  of finalizeAddressDependentContent() and makes it iterative.
  Symbol assignment may not converge so we make a few attempts before
  bailing out.

Note, assignAddresses() must be called at least twice. The penultimate
call finalized section addresses while the last finalized symbol values.
It is somewhat obscure and there was no comment.
linkerscript/addr-zero.test tests this.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D66279

Added:
    lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes.test
    lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes2.test
    lld/trunk/test/ELF/linkerscript/symbol-assign-not-converge.test
Modified:
    lld/trunk/ELF/LinkerScript.cpp
    lld/trunk/ELF/LinkerScript.h
    lld/trunk/ELF/Relocations.cpp
    lld/trunk/ELF/Writer.cpp

Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=369889&r1=369888&r2=369889&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Mon Aug 26 03:23:31 2019
@@ -210,6 +210,44 @@ static void declareSymbol(SymbolAssignme
   sym->scriptDefined = true;
 }
 
+using SymbolAssignmentMap =
+    DenseMap<const Defined *, std::pair<SectionBase *, uint64_t>>;
+
+// Collect section/value pairs of linker-script-defined symbols. This is used to
+// check whether symbol values converge.
+static SymbolAssignmentMap
+getSymbolAssignmentValues(const std::vector<BaseCommand *> &sectionCommands) {
+  SymbolAssignmentMap ret;
+  for (BaseCommand *base : sectionCommands) {
+    if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
+      if (cmd->sym) // sym is nullptr for dot.
+        ret.try_emplace(cmd->sym,
+                        std::make_pair(cmd->sym->section, cmd->sym->value));
+      continue;
+    }
+    for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands)
+      if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base))
+        if (cmd->sym)
+          ret.try_emplace(cmd->sym,
+                          std::make_pair(cmd->sym->section, cmd->sym->value));
+  }
+  return ret;
+}
+
+// Returns the lexicographical smallest (for determinism) Defined whose
+// section/value has changed.
+static const Defined *
+getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) {
+  const Defined *changed = nullptr;
+  for (auto &it : oldValues) {
+    const Defined *sym = it.first;
+    if (std::make_pair(sym->section, sym->value) != it.second &&
+        (!changed || sym->getName() < changed->getName()))
+      changed = sym;
+  }
+  return changed;
+}
+
 // This method is used to handle INSERT AFTER statement. Here we rebuild
 // the list of script commands to mix sections inserted into.
 void LinkerScript::processInsertCommands() {
@@ -1051,7 +1089,9 @@ static uint64_t getInitialDot() {
 // Here we assign addresses as instructed by linker script SECTIONS
 // sub-commands. Doing that allows us to use final VA values, so here
 // we also handle rest commands like symbol assignments and ASSERTs.
-void LinkerScript::assignAddresses() {
+// Returns a symbol that has changed its section or value, or nullptr if no
+// symbol has changed.
+const Defined *LinkerScript::assignAddresses() {
   dot = getInitialDot();
 
   auto deleter = std::make_unique<AddressState>();
@@ -1059,6 +1099,7 @@ void LinkerScript::assignAddresses() {
   errorOnMissingSection = true;
   switchTo(aether);
 
+  SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands);
   for (BaseCommand *base : sectionCommands) {
     if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
       cmd->addr = dot;
@@ -1068,7 +1109,9 @@ void LinkerScript::assignAddresses() {
     }
     assignOffsets(cast<OutputSection>(base));
   }
+
   ctx = nullptr;
+  return getChangedSymbolAssignment(oldValues);
 }
 
 // Creates program headers as instructed by PHDRS linker script command.

Modified: lld/trunk/ELF/LinkerScript.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.h?rev=369889&r1=369888&r2=369889&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.h (original)
+++ lld/trunk/ELF/LinkerScript.h Mon Aug 26 03:23:31 2019
@@ -271,7 +271,7 @@ public:
   bool needsInterpSection();
 
   bool shouldKeep(InputSectionBase *s);
-  void assignAddresses();
+  const Defined *assignAddresses();
   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
   void processSectionCommands();
   void declareSymbols();

Modified: lld/trunk/ELF/Relocations.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Relocations.cpp?rev=369889&r1=369888&r2=369889&view=diff
==============================================================================
--- lld/trunk/ELF/Relocations.cpp (original)
+++ lld/trunk/ELF/Relocations.cpp Mon Aug 26 03:23:31 2019
@@ -1692,11 +1692,6 @@ bool ThunkCreator::createThunks(ArrayRef
   if (pass == 0 && target->getThunkSectionSpacing())
     createInitialThunkSections(outputSections);
 
-  // With Thunk Size much smaller than branch range we expect to
-  // converge quickly; if we get to 10 something has gone wrong.
-  if (pass == 10)
-    fatal("thunk creation not converged");
-
   // Create all the Thunks and insert them into synthetic ThunkSections. The
   // ThunkSections are later inserted back into InputSectionDescriptions.
   // We separate the creation of ThunkSections from the insertion of the

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=369889&r1=369888&r2=369889&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Mon Aug 26 03:23:31 2019
@@ -578,8 +578,6 @@ template <class ELFT> void Writer<ELFT>:
   if (errorCount())
     return;
 
-  script->assignAddresses();
-
   // If -compressed-debug-sections is specified, we need to compress
   // .debug_* sections. Do it right now because it changes the size of
   // output sections.
@@ -1562,15 +1560,18 @@ template <class ELFT> void Writer<ELFT>:
 template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
   ThunkCreator tc;
   AArch64Err843419Patcher a64p;
+  script->assignAddresses();
 
-  // For some targets, like x86, this loop iterates only once.
+  int assignPasses = 0;
   for (;;) {
-    bool changed = false;
+    bool changed = target->needsThunks && tc.createThunks(outputSections);
 
-    script->assignAddresses();
-
-    if (target->needsThunks)
-      changed |= tc.createThunks(outputSections);
+    // With Thunk Size much smaller than branch range we expect to
+    // converge quickly; if we get to 10 something has gone wrong.
+    if (changed && tc.pass >= 10) {
+      error("thunk creation not converged");
+      break;
+    }
 
     if (config->fixCortexA53Errata843419) {
       if (changed)
@@ -1587,8 +1588,19 @@ template <class ELFT> void Writer<ELFT>:
         changed |= part.relrDyn->updateAllocSize();
     }
 
-    if (!changed)
-      return;
+    const Defined *changedSym = script->assignAddresses();
+    if (!changed) {
+      // Some symbols may be dependent on section addresses. When we break the
+      // loop, the symbol values are finalized because a previous
+      // assignAddresses() finalized section addresses.
+      if (!changedSym)
+        break;
+      if (++assignPasses == 5) {
+        errorOrWarn("assignment to symbol " + toString(*changedSym) +
+                    " does not converge");
+        break;
+      }
+    }
   }
 }
 

Added: lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes.test
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes.test?rev=369889&view=auto
==============================================================================
--- lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes.test (added)
+++ lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes.test Mon Aug 26 03:23:31 2019
@@ -0,0 +1,25 @@
+# REQUIRES: aarch64, x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: ld.lld %t.o -T %s -o %t
+# RUN: llvm-nm %t | FileCheck %s
+
+## AArch64 needs thunks and has different address finalization process, so test
+## it as well.
+# RUN: llvm-mc -filetype=obj -triple=aarch64 /dev/null -o %t.o
+# RUN: ld.lld %t.o -T %s -o %t
+# RUN: llvm-nm %t | FileCheck %s
+
+# CHECK: 0000000000001004 T a
+# CHECK: 0000000000001003 T b
+# CHECK: 0000000000001002 T c
+# CHECK: 0000000000001001 T d
+# CHECK: 0000000000001000 T e
+
+SECTIONS {
+  . = 0x1000;
+  a = b + 1;
+  b = c + 1;
+  c = d + 1;
+  d = e + 1;
+  e = .;
+}

Added: lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes2.test
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes2.test?rev=369889&view=auto
==============================================================================
--- lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes2.test (added)
+++ lld/trunk/test/ELF/linkerscript/symbol-assign-many-passes2.test Mon Aug 26 03:23:31 2019
@@ -0,0 +1,28 @@
+# REQUIRES: arm
+# RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=armv7a-linux-gnueabihf %S/../arm-thunk-many-passes.s -o %t.o
+# RUN: ld.lld %t.o -T %s -o %t
+# RUN: llvm-nm %t | FileCheck %s
+
+## arm-thunk-many-passes.s is worst case case of thunk generation that takes 9
+## passes to converge. It takes a few more passes to make symbol assignment
+## converge. Test that
+## 1. we don't error that "thunk creation not converged".
+## 2. we check convergence of symbols defined in an output section descriptor.
+
+# CHECK: 01011050 T a
+# CHECK: 0101104f T b
+# CHECK: 0101104e T c
+# CHECK: 0101104d T d
+# CHECK: 0101104c T e
+
+SECTIONS {
+  . = SIZEOF_HEADERS;
+  .text 0x00011000 : {
+    a = b + 1;
+    b = c + 1;
+    c = d + 1;
+    d = e + 1;
+    *(.text);
+  }
+  e = .;
+}

Added: lld/trunk/test/ELF/linkerscript/symbol-assign-not-converge.test
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/symbol-assign-not-converge.test?rev=369889&view=auto
==============================================================================
--- lld/trunk/test/ELF/linkerscript/symbol-assign-not-converge.test (added)
+++ lld/trunk/test/ELF/linkerscript/symbol-assign-not-converge.test Mon Aug 26 03:23:31 2019
@@ -0,0 +1,20 @@
+# REQUIRES: aarch64, x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld %t.o -T %s -o /dev/null 2>&1 | FileCheck %s
+
+## AArch64 needs thunks and has different address finalization process, so test
+## it as well.
+# RUN: llvm-mc -filetype=obj -triple=aarch64 /dev/null -o %t.o
+# RUN: not ld.lld %t.o -T %s -o /dev/null 2>&1 | FileCheck %s
+
+# CHECK: error: assignment to symbol a does not converge
+
+SECTIONS {
+  . = 0x1000;
+  a = b;
+  b = c;
+  c = d;
+  d = e;
+  e = f;
+  f = .;
+}




More information about the llvm-commits mailing list