[lld] r281736 - Change how we compute offsets with linker scripts.

Rafael Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 16 08:10:23 PDT 2016


Author: rafael
Date: Fri Sep 16 10:10:23 2016
New Revision: 281736

URL: http://llvm.org/viewvc/llvm-project?rev=281736&view=rev
Log:
Change how we compute offsets with linker scripts.

This fixes pr30367, but more importantly, it changes how we compute offsets.

Now offset computation in a walk over linker script commands, like the
rest of assignAddresses. IMHO this is simpler to understand and if we
ever have to create multiple outputsections or chunks to change how we
handle test/ELF/linkerscript/alternate-sections.s it should be easier
to do it.

Modified:
    lld/trunk/ELF/LinkerScript.cpp
    lld/trunk/ELF/LinkerScript.h
    lld/trunk/test/ELF/linkerscript/merge-sections.s
    lld/trunk/test/ELF/linkerscript/symbols-synthetic.s

Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=281736&r1=281735&r2=281736&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Fri Sep 16 10:10:23 2016
@@ -142,31 +142,49 @@ static bool checkConstraint(uint64_t Fla
 }
 
 template <class ELFT>
-static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections,
+static bool matchConstraints(ArrayRef<InputSectionData *> Sections,
                              ConstraintKind Kind) {
   if (Kind == ConstraintKind::NoConstraint)
     return true;
-  return llvm::all_of(Sections, [=](InputSectionBase<ELFT> *Sec) {
+  return llvm::all_of(Sections, [=](InputSectionData *Sec2) {
+    auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2);
     return checkConstraint(Sec->getSectionHdr()->sh_flags, Kind);
   });
 }
 
-// Returns input sections filtered by given glob patterns.
+// Compute and remember which sections the InputSectionDescription matches.
 template <class ELFT>
-std::vector<InputSectionBase<ELFT> *>
-LinkerScript<ELFT>::getInputSections(const InputSectionDescription *I) {
+void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I,
+                                              ConstraintKind Constraint) {
   const Regex &Re = I->SectionRe;
-  std::vector<InputSectionBase<ELFT> *> Ret;
   for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
     if (fileMatches(I, sys::path::filename(F->getName())))
       for (InputSectionBase<ELFT> *S : F->getSections())
         if (!isDiscarded(S) && !S->OutSec &&
             const_cast<Regex &>(Re).match(S->Name))
-          Ret.push_back(S);
+          I->Sections.push_back(S);
 
   if (const_cast<Regex &>(Re).match("COMMON"))
-    Ret.push_back(CommonInputSection<ELFT>::X);
-  return Ret;
+    I->Sections.push_back(CommonInputSection<ELFT>::X);
+
+  if (!matchConstraints<ELFT>(I->Sections, Constraint)) {
+    I->Sections.clear();
+    return;
+  }
+
+  if (I->SortInner)
+    std::stable_sort(I->Sections.begin(), I->Sections.end(),
+                     getComparator(I->SortInner));
+  if (I->SortOuter)
+    std::stable_sort(I->Sections.begin(), I->Sections.end(),
+                     getComparator(I->SortOuter));
+
+  // We do not add duplicate input sections, so mark them with a dummy output
+  // section for now.
+  for (InputSectionData *S : I->Sections) {
+    auto *S2 = static_cast<InputSectionBase<ELFT> *>(S);
+    S2->OutSec = (OutputSectionBase<ELFT> *)-1;
+  }
 }
 
 template <class ELFT>
@@ -181,30 +199,18 @@ template <class ELFT>
 std::vector<InputSectionBase<ELFT> *>
 LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) {
   std::vector<InputSectionBase<ELFT> *> Ret;
-  DenseSet<InputSectionBase<ELFT> *> SectionIndex;
 
   for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) {
     if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) {
       if (shouldDefine<ELFT>(OutCmd))
         addSymbol<ELFT>(OutCmd);
-      OutCmd->GoesAfter = Ret.empty() ? nullptr : Ret.back();
       continue;
     }
 
     auto *Cmd = cast<InputSectionDescription>(Base.get());
-    std::vector<InputSectionBase<ELFT> *> V = getInputSections(Cmd);
-    if (!matchConstraints<ELFT>(V, OutCmd.Constraint))
-      continue;
-    if (Cmd->SortInner)
-      std::stable_sort(V.begin(), V.end(), getComparator(Cmd->SortInner));
-    if (Cmd->SortOuter)
-      std::stable_sort(V.begin(), V.end(), getComparator(Cmd->SortOuter));
-
-    // Add all input sections corresponding to rule 'Cmd' to
-    // resulting vector. We do not add duplicate input sections.
-    for (InputSectionBase<ELFT> *S : V)
-      if (SectionIndex.insert(S).second)
-        Ret.push_back(S);
+    computeInputSections(Cmd, OutCmd.Constraint);
+    for (InputSectionData *S : Cmd->Sections)
+      Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S));
   }
   return Ret;
 }
@@ -320,84 +326,74 @@ static void assignSectionSymbol(SymbolAs
   Body->Value = Cmd->Expression(Sec->getVA() + Off);
 }
 
-// Linker script may define start and end symbols for special section types,
-// like .got, .eh_frame_hdr, .eh_frame and others. Those sections are not a list
-// of regular input input sections, therefore our way of defining symbols for
-// regular sections will not work. The approach we use for special section types
-// is not perfect - it handles only start and end symbols.
-template <class ELFT>
-void addStartEndSymbols(OutputSectionCommand *Cmd,
-                        OutputSectionBase<ELFT> *Sec) {
-  bool Start = true;
-  BaseCommand *PrevCmd = nullptr;
-
-  for (std::unique_ptr<BaseCommand> &Base : Cmd->Commands) {
-    if (auto *AssignCmd = dyn_cast<SymbolAssignment>(Base.get())) {
-      assignSectionSymbol<ELFT>(AssignCmd, Sec, Start ? 0 : Sec->getSize());
-    } else {
-      if (!Start && isa<SymbolAssignment>(PrevCmd))
-        error("section '" + Sec->getName() +
-              "' supports only start and end symbols");
-      Start = false;
-    }
-    PrevCmd = Base.get();
+template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) {
+  if (!AlreadyOutputIS.insert(S).second)
+    return;
+  bool IsTbss =
+      (CurOutSec->getFlags() & SHF_TLS) && CurOutSec->getType() == SHT_NOBITS;
+
+  uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot;
+  Pos = alignTo(Pos, S->Alignment);
+  S->OutSecOff = Pos - CurOutSec->getVA();
+  Pos += S->getSize();
+
+  // Update output section size after adding each section. This is so that
+  // SIZEOF works correctly in the case below:
+  // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
+  CurOutSec->setSize(Pos - CurOutSec->getVA());
+
+  if (!IsTbss)
+    Dot = Pos;
+}
+
+template <class ELFT> void LinkerScript<ELFT>::flush() {
+  if (auto *OutSec = dyn_cast_or_null<OutputSection<ELFT>>(CurOutSec)) {
+    for (InputSection<ELFT> *I : OutSec->Sections)
+      output(I);
+    AlreadyOutputOS.insert(CurOutSec);
   }
 }
 
 template <class ELFT>
-void assignOffsets(OutputSectionCommand *Cmd, OutputSectionBase<ELFT> *Sec) {
-  auto *OutSec = dyn_cast<OutputSection<ELFT>>(Sec);
-  if (!OutSec) {
-    Sec->assignOffsets();
-    // This section is not regular output section. However linker script may
-    // have defined start/end symbols for it. This case is handled below.
-    addStartEndSymbols(Cmd, Sec);
+void LinkerScript<ELFT>::switchTo(OutputSectionBase<ELFT> *Sec) {
+  if (CurOutSec == Sec)
+    return;
+  if (AlreadyOutputOS.count(Sec))
     return;
-  }
-  typedef typename ELFT::uint uintX_t;
-  uintX_t Off = 0;
-  auto ItCmd = Cmd->Commands.begin();
 
-  // Assigns values to all symbols following the given
-  // input section 'D' in output section 'Sec'. When symbols
-  // are in the beginning of output section the value of 'D'
-  // is nullptr.
-  auto AssignSuccessors = [&](InputSectionData *D) {
-    for (; ItCmd != Cmd->Commands.end(); ++ItCmd) {
-      auto *AssignCmd = dyn_cast<SymbolAssignment>(ItCmd->get());
-      if (!AssignCmd)
-        continue;
-      if (D != AssignCmd->GoesAfter)
-        break;
+  flush();
+  CurOutSec = Sec;
 
-      if (AssignCmd->Name == ".") {
-        // Update to location counter means update to section size.
-        Off = AssignCmd->Expression(Sec->getVA() + Off) - Sec->getVA();
-        Sec->setSize(Off);
-        continue;
-      }
-      assignSectionSymbol<ELFT>(AssignCmd, Sec, Off);
-    }
-  };
+  Dot = alignTo(Dot, CurOutSec->getAlignment());
+  CurOutSec->setVA(Dot);
+}
 
-  AssignSuccessors(nullptr);
-  for (InputSection<ELFT> *I : OutSec->Sections) {
-    Off = alignTo(Off, I->Alignment);
-    I->OutSecOff = Off;
-    Off += I->getSize();
-    // Update section size inside for-loop, so that SIZEOF
-    // works correctly in the case below:
-    // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
-    Sec->setSize(Off);
-    // Add symbols following current input section.
-    AssignSuccessors(I);
+template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) {
+  if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) {
+    if (AssignCmd->Name == ".") {
+      // Update to location counter means update to section size.
+      Dot = AssignCmd->Expression(Dot);
+      CurOutSec->setSize(Dot - CurOutSec->getVA());
+      return;
+    }
+    assignSectionSymbol<ELFT>(AssignCmd, CurOutSec, Dot - CurOutSec->getVA());
+    return;
+  }
+  auto &ICmd = cast<InputSectionDescription>(Base);
+  for (InputSectionData *ID : ICmd.Sections) {
+    auto *IB = static_cast<InputSectionBase<ELFT> *>(ID);
+    switchTo(IB->OutSec);
+    if (auto *I = dyn_cast<InputSection<ELFT>>(IB))
+      output(I);
+    else if (AlreadyOutputOS.insert(CurOutSec).second)
+      Dot += CurOutSec->getSize();
   }
 }
 
 template <class ELFT>
 static std::vector<OutputSectionBase<ELFT> *>
 findSections(OutputSectionCommand &Cmd,
-             ArrayRef<OutputSectionBase<ELFT> *> Sections) {
+             const std::vector<OutputSectionBase<ELFT> *> &Sections) {
   std::vector<OutputSectionBase<ELFT> *> Ret;
   for (OutputSectionBase<ELFT> *Sec : Sections)
     if (Sec->getName() == Cmd.Name &&
@@ -406,6 +402,34 @@ findSections(OutputSectionCommand &Cmd,
   return Ret;
 }
 
+template <class ELFT>
+void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) {
+  std::vector<OutputSectionBase<ELFT> *> Sections =
+      findSections(*Cmd, *OutputSections);
+  if (Sections.empty())
+    return;
+  switchTo(Sections[0]);
+
+  // Find the last section output location. We will output orphan sections
+  // there so that end symbols point to the correct location.
+  auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(),
+                        [](const std::unique_ptr<BaseCommand> &Cmd) {
+                          return !isa<SymbolAssignment>(*Cmd);
+                        })
+               .base();
+  for (auto I = Cmd->Commands.begin(); I != E; ++I)
+    process(**I);
+  flush();
+  for (OutputSectionBase<ELFT> *Base : Sections) {
+    if (!AlreadyOutputOS.insert(Base).second)
+      continue;
+    switchTo(Base);
+    Dot += CurOutSec->getSize();
+  }
+  for (auto I = E, E = Cmd->Commands.end(); I != E; ++I)
+    process(**I);
+}
+
 template <class ELFT> void LinkerScript<ELFT>::assignAddresses() {
   // Orphan sections are sections present in the input files which
   // are not explicitly placed into the output file by the linker script.
@@ -421,7 +445,6 @@ template <class ELFT> void LinkerScript<
   // Assign addresses as instructed by linker script SECTIONS sub-commands.
   Dot = getHeaderSize();
   uintX_t MinVA = std::numeric_limits<uintX_t>::max();
-  uintX_t ThreadBssOffset = 0;
 
   for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) {
     if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) {
@@ -439,34 +462,17 @@ template <class ELFT> void LinkerScript<
     }
 
     auto *Cmd = cast<OutputSectionCommand>(Base.get());
-    for (OutputSectionBase<ELFT> *Sec :
-         findSections<ELFT>(*Cmd, *OutputSections)) {
-
-      if (Cmd->AddrExpr)
-        Dot = Cmd->AddrExpr(Dot);
 
-      if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) {
-        uintX_t TVA = Dot + ThreadBssOffset;
-        TVA = alignTo(TVA, Sec->getAlignment());
-        Sec->setVA(TVA);
-        assignOffsets(Cmd, Sec);
-        ThreadBssOffset = TVA - Dot + Sec->getSize();
-        continue;
-      }
+    if (Cmd->AddrExpr)
+      Dot = Cmd->AddrExpr(Dot);
 
-      if (!(Sec->getFlags() & SHF_ALLOC)) {
-        assignOffsets(Cmd, Sec);
-        continue;
-      }
-
-      Dot = alignTo(Dot, Sec->getAlignment());
-      Sec->setVA(Dot);
-      assignOffsets(Cmd, Sec);
-      MinVA = std::min(MinVA, Dot);
-      Dot += Sec->getSize();
-    }
+    MinVA = std::min(MinVA, Dot);
+    assignOffsets(Cmd);
   }
 
+  for (OutputSectionBase<ELFT> *Sec : *OutputSections)
+    if (!(Sec->getFlags() & SHF_ALLOC))
+      Sec->setVA(0);
   uintX_t HeaderSize =
       Out<ELFT>::ElfHeader->getSize() + Out<ELFT>::ProgramHeaders->getSize();
   if (HeaderSize > MinVA)

Modified: lld/trunk/ELF/LinkerScript.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.h?rev=281736&r1=281735&r2=281736&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.h (original)
+++ lld/trunk/ELF/LinkerScript.h Fri Sep 16 10:10:23 2016
@@ -14,6 +14,7 @@
 #include "Writer.h"
 #include "lld/Core/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -26,6 +27,7 @@ class DefinedCommon;
 class ScriptParser;
 class SymbolBody;
 template <class ELFT> class InputSectionBase;
+template <class ELFT> class InputSection;
 template <class ELFT> class OutputSectionBase;
 template <class ELFT> class OutputSectionFactory;
 class InputSectionData;
@@ -70,7 +72,6 @@ struct SymbolAssignment : BaseCommand {
   bool Provide = false;
   bool Hidden = false;
   bool IsAbsolute;
-  InputSectionData *GoesAfter = nullptr;
 };
 
 // Linker scripts allow additional constraints to be put on ouput sections.
@@ -107,6 +108,7 @@ struct InputSectionDescription : BaseCom
   SortKind SortInner = SortNone;
   llvm::Regex ExcludedFileRe;
   llvm::Regex SectionRe;
+  std::vector<InputSectionData *> Sections;
 };
 
 struct AssertCommand : BaseCommand {
@@ -173,6 +175,7 @@ public:
   ArrayRef<uint8_t> getFiller(StringRef Name);
   Expr getLma(StringRef Name);
   bool shouldKeep(InputSectionBase<ELFT> *S);
+  void assignOffsets(OutputSectionCommand *Cmd);
   void assignAddresses();
   int compareSections(StringRef A, StringRef B);
   bool hasPhdrsCommands();
@@ -185,8 +188,8 @@ public:
   std::vector<OutputSectionBase<ELFT> *> *OutputSections;
 
 private:
-  std::vector<InputSectionBase<ELFT> *>
-  getInputSections(const InputSectionDescription *);
+  void computeInputSections(InputSectionDescription *,
+                            ConstraintKind Constraint);
 
   void discard(ArrayRef<InputSectionBase<ELFT> *> V);
 
@@ -201,6 +204,14 @@ private:
   size_t getPhdrIndex(StringRef PhdrName);
 
   uintX_t Dot;
+  OutputSectionBase<ELFT> *CurOutSec = nullptr;
+  uintX_t ThreadBssOffset = 0;
+  void switchTo(OutputSectionBase<ELFT> *Sec);
+  void flush();
+  void output(InputSection<ELFT> *Sec);
+  void process(BaseCommand &Base);
+  llvm::DenseSet<OutputSectionBase<ELFT> *> AlreadyOutputOS;
+  llvm::DenseSet<InputSectionData *> AlreadyOutputIS;
 };
 
 // Variable template is a C++14 feature, so we can't template

Modified: lld/trunk/test/ELF/linkerscript/merge-sections.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/merge-sections.s?rev=281736&r1=281735&r2=281736&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript/merge-sections.s (original)
+++ lld/trunk/test/ELF/linkerscript/merge-sections.s Fri Sep 16 10:10:23 2016
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
 
-# RUN: echo "SECTIONS {.foo : {*(.foo.*)} }" > %t.script
+# RUN: echo "SECTIONS {.foo : { begin = .; *(.foo.*) end = .;} }" > %t.script
 # RUN: ld.lld -o %t1 --script %t.script %t -shared
 # RUN: llvm-readobj -s -t %t1 | FileCheck %s
 
@@ -52,6 +52,13 @@
 # CHECK-NEXT:   AddressAlignment: 2
 # CHECK-NEXT:   EntrySize: 2
 
+
+# CHECK:      Name: begin
+# CHECK-NEXT: Value: 0x1C8
+
+# CHECK:      Name: end
+# CHECK-NEXT: Value: 0x1D0
+
 .section        .foo.1a,"aMS", at progbits,1
 .asciz "foo"
 

Modified: lld/trunk/test/ELF/linkerscript/symbols-synthetic.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/symbols-synthetic.s?rev=281736&r1=281735&r2=281736&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript/symbols-synthetic.s (original)
+++ lld/trunk/test/ELF/linkerscript/symbols-synthetic.s Fri Sep 16 10:10:23 2016
@@ -29,19 +29,6 @@
 # RUN: ld.lld -o %t1 --eh-frame-hdr --script %t.script %t
 # RUN: llvm-objdump -t %t1 | FileCheck --check-prefix=SIMPLE %s
 
-# The script below contains symbols in the middle of .eh_frame_hdr section.
-# We don't support this.
-# RUN: echo "SECTIONS { \
-# RUN:          .eh_frame_hdr : { \
-# RUN:             PROVIDE_HIDDEN(_begin_sec = .); \
-# RUN:             __eh_frame_hdr_start = .; \
-# RUN:             *(.eh_frame_hdr) \
-# RUN:             PROVIDE_HIDDEN(_end_sec_abs = ABSOLUTE(.)); \
-# RUN:             *(.eh_frame_hdr) } \
-# RUN:             PROVIDE_HIDDEN(_end_sec = .); \
-# RUN:         }" > %t.script
-# RUN: not ld.lld -o %t1 --eh-frame-hdr --script %t.script %t 2>&1 | FileCheck --check-prefix=ERROR %s
-
 # Check that the following script is processed without errors
 # RUN: echo "SECTIONS { \
 # RUN:          .eh_frame_hdr : { \
@@ -69,7 +56,6 @@
 # SIMPLE-NEXT: 0000000000001010         *ABS*             00000000 __eh_frame_hdr_start2
 # SIMPLE-NEXT: 0000000000001018         .eh_frame_hdr     00000000 __eh_frame_hdr_end
 # SIMPLE-NEXT: 0000000000001020         *ABS*             00000000 __eh_frame_hdr_end2
-# ERROR: section '.eh_frame_hdr' supports only start and end symbols
 
 .global _start
 _start:




More information about the llvm-commits mailing list