[lld] r267233 - Bring r267164 back with a fix.

Rafael Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 22 15:09:35 PDT 2016


Author: rafael
Date: Fri Apr 22 17:09:35 2016
New Revision: 267233

URL: http://llvm.org/viewvc/llvm-project?rev=267233&view=rev
Log:
Bring r267164 back with a fix.

The fix is to handle local symbols referring to SHF_MERGE sections.

Original message:

GC entries of SHF_MERGE sections.

It is a fairly direct extension of the gc algorithm. For merge sections
instead of remembering just a live bit, we remember which offsets
were used.

This reduces the .rodata sections in chromium from 9648861 to 9477472
bytes.

Added:
    lld/trunk/test/ELF/gc-sections-merge-addend.s
    lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s
    lld/trunk/test/ELF/gc-sections-merge.s
Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/MarkLive.cpp
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/Writer.cpp
    lld/trunk/test/ELF/gc-merge-local-sym.s

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Fri Apr 22 17:09:35 2016
@@ -77,23 +77,41 @@ InputSectionBase<ELFT>::getOffset(const
   return getOffset(Sym.Value);
 }
 
-// Returns a section that Rel relocation is pointing to.
 template <class ELFT>
-InputSectionBase<ELFT> *
-InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) const {
-  // Global symbol
+static DefinedRegular<ELFT> *getRelocTargetSym(elf::ObjectFile<ELFT> *File,
+                                               const typename ELFT::Rel &Rel) {
   uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL);
   SymbolBody &B = File->getSymbolBody(SymIndex).repl();
   if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B))
     if (D->Section)
-      return D->Section->Repl;
+      return D;
   return nullptr;
 }
 
+// Returns a section that Rel relocation is pointing to.
+template <class ELFT>
+std::pair<InputSectionBase<ELFT> *, typename ELFT::uint>
+InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) const {
+  auto *D = getRelocTargetSym(File, Rel);
+  if (!D)
+    return std::make_pair(nullptr, 0);
+  if (!D->isSection())
+    return std::make_pair(D->Section->Repl, D->Value);
+  const uint8_t *BufLoc = getSectionData().begin() + Rel.r_offset;
+  uintX_t Addend =
+      Target->getImplicitAddend(BufLoc, Rel.getType(Config->Mips64EL));
+  return std::make_pair(D->Section->Repl, D->Value + Addend);
+}
+
 template <class ELFT>
-InputSectionBase<ELFT> *
+std::pair<InputSectionBase<ELFT> *, typename ELFT::uint>
 InputSectionBase<ELFT>::getRelocTarget(const Elf_Rela &Rel) const {
-  return getRelocTarget(reinterpret_cast<const Elf_Rel &>(Rel));
+  auto *D = getRelocTargetSym(File, Rel);
+  if (!D)
+    return std::make_pair(nullptr, 0);
+  if (!D->isSection())
+    return std::make_pair(D->Section->Repl, D->Value);
+  return std::make_pair(D->Section->Repl, D->Value + Rel.r_addend);
 }
 
 template <class ELFT>
@@ -368,10 +386,49 @@ typename ELFT::uint EHInputSection<ELFT>
   return Base + Addend;
 }
 
+static size_t findNull(StringRef S, size_t EntSize) {
+  // Optimize the common case.
+  if (EntSize == 1)
+    return S.find(0);
+
+  for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
+    const char *B = S.begin() + I;
+    if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
+      return I;
+  }
+  return StringRef::npos;
+}
+
 template <class ELFT>
 MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F,
                                            const Elf_Shdr *Header)
-    : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {}
+    : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {
+  uintX_t EntSize = Header->sh_entsize;
+  ArrayRef<uint8_t> D = this->getSectionData();
+  StringRef Data((const char *)D.data(), D.size());
+  std::vector<std::pair<uintX_t, uintX_t>> &Offsets = this->Offsets;
+
+  uintX_t V = Config->GcSections ? -1 : 0;
+  if (Header->sh_flags & SHF_STRINGS) {
+    uintX_t Offset = 0;
+    while (!Data.empty()) {
+      size_t End = findNull(Data, EntSize);
+      if (End == StringRef::npos)
+        fatal("string is not null terminated");
+      Offsets.push_back(std::make_pair(Offset, V));
+      uintX_t Size = End + EntSize;
+      Data = Data.substr(Size);
+      Offset += Size;
+    }
+    return;
+  }
+
+  // If this is not of type string, every entry has the same size.
+  size_t Size = Data.size();
+  assert((Size % EntSize) == 0);
+  for (unsigned I = 0, N = Size; I != N; I += EntSize)
+    Offsets.push_back(std::make_pair(I, V));
+}
 
 template <class ELFT>
 bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Fri Apr 22 17:09:35 2016
@@ -124,8 +124,10 @@ public:
   ArrayRef<uint8_t> getSectionData() const;
 
   // Returns a section that Rel is pointing to. Used by the garbage collector.
-  InputSectionBase<ELFT> *getRelocTarget(const Elf_Rel &Rel) const;
-  InputSectionBase<ELFT> *getRelocTarget(const Elf_Rela &Rel) const;
+  std::pair<InputSectionBase<ELFT> *, uintX_t>
+  getRelocTarget(const Elf_Rel &Rel) const;
+  std::pair<InputSectionBase<ELFT> *, uintX_t>
+  getRelocTarget(const Elf_Rela &Rel) const;
 
   void relocate(uint8_t *Buf, uint8_t *BufEnd);
   std::vector<Relocation> Relocations;

Modified: lld/trunk/ELF/MarkLive.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/MarkLive.cpp?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/ELF/MarkLive.cpp (original)
+++ lld/trunk/ELF/MarkLive.cpp Fri Apr 22 17:09:35 2016
@@ -40,20 +40,29 @@ using namespace lld::elf;
 
 // Calls Fn for each section that Sec refers to via relocations.
 template <class ELFT>
-static void forEachSuccessor(InputSection<ELFT> *Sec,
-                             std::function<void(InputSectionBase<ELFT> *)> Fn) {
+static void forEachSuccessor(
+    InputSection<ELFT> *Sec,
+    std::function<void(InputSectionBase<ELFT> *, typename ELFT::uint Offset)>
+        Fn) {
   typedef typename ELFT::Rel Elf_Rel;
   typedef typename ELFT::Rela Elf_Rela;
   typedef typename ELFT::Shdr Elf_Shdr;
+  typedef typename ELFT::uint uintX_t;
 
   ELFFile<ELFT> &Obj = Sec->getFile()->getObj();
   for (const Elf_Shdr *RelSec : Sec->RelocSections) {
     if (RelSec->sh_type == SHT_RELA) {
-      for (const Elf_Rela &RI : Obj.relas(RelSec))
-        Fn(Sec->getRelocTarget(RI));
+      for (const Elf_Rela &RI : Obj.relas(RelSec)) {
+        std::pair<InputSectionBase<ELFT> *, uintX_t> P =
+            Sec->getRelocTarget(RI);
+        Fn(P.first, P.second);
+      }
     } else {
-      for (const Elf_Rel &RI : Obj.rels(RelSec))
-        Fn(Sec->getRelocTarget(RI));
+      for (const Elf_Rel &RI : Obj.rels(RelSec)) {
+        std::pair<InputSectionBase<ELFT> *, uintX_t> P =
+            Sec->getRelocTarget(RI);
+        Fn(P.first, P.second);
+      }
     }
   }
 }
@@ -85,10 +94,18 @@ template <class ELFT> static bool isRese
 // Starting from GC-root sections, this function visits all reachable
 // sections to set their "Live" bits.
 template <class ELFT> void elf::markLive(SymbolTable<ELFT> *Symtab) {
+  typedef typename ELFT::uint uintX_t;
   SmallVector<InputSection<ELFT> *, 256> Q;
 
-  auto Enqueue = [&](InputSectionBase<ELFT> *Sec) {
-    if (!Sec || Sec->Live)
+  auto Enqueue = [&](InputSectionBase<ELFT> *Sec, uintX_t Offset) {
+    if (!Sec)
+      return;
+    if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(Sec)) {
+      std::pair<std::pair<uintX_t, uintX_t> *, uintX_t> T =
+          MS->getRangeAndSize(Offset);
+      T.first->second = 0;
+    }
+    if (Sec->Live)
       return;
     Sec->Live = true;
     if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(Sec))
@@ -98,7 +115,7 @@ template <class ELFT> void elf::markLive
   auto MarkSymbol = [&](SymbolBody *Sym) {
     if (Sym)
       if (auto *D = dyn_cast<DefinedRegular<ELFT>>(Sym))
-        Enqueue(D->Section);
+        Enqueue(D->Section, D->Value);
   };
 
   // Add GC root symbols.
@@ -122,7 +139,7 @@ template <class ELFT> void elf::markLive
     for (InputSectionBase<ELFT> *Sec : F->getSections())
       if (Sec && Sec != &InputSection<ELFT>::Discarded)
         if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec))
-          Enqueue(Sec);
+          Enqueue(Sec, 0);
 
   // Mark all reachable sections.
   while (!Q.empty())

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Fri Apr 22 17:09:35 2016
@@ -1132,7 +1132,7 @@ void EHOutputSection<ELFT>::addSectionAu
     } else {
       if (!HasReloc)
         fatal("FDE doesn't reference another section");
-      InputSectionBase<ELFT> *Target = S->getRelocTarget(*RelI);
+      InputSectionBase<ELFT> *Target = S->getRelocTarget(*RelI).first;
       if (Target && Target->Live) {
         uint32_t CieOffset = Offset + 4 - ID;
         auto I = OffsetToIndex.find(CieOffset);
@@ -1227,19 +1227,6 @@ template <class ELFT> void MergeOutputSe
   }
 }
 
-static size_t findNull(StringRef S, size_t EntSize) {
-  // Optimize the common case.
-  if (EntSize == 1)
-    return S.find(0);
-
-  for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
-    const char *B = S.begin() + I;
-    if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
-      return I;
-  }
-  return StringRef::npos;
-}
-
 template <class ELFT>
 void MergeOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) {
   auto *S = cast<MergeInputSection<ELFT>>(C);
@@ -1250,31 +1237,32 @@ void MergeOutputSection<ELFT>::addSectio
   StringRef Data((const char *)D.data(), D.size());
   uintX_t EntSize = S->getSectionHdr()->sh_entsize;
   this->Header.sh_entsize = EntSize;
+  MutableArrayRef<std::pair<uintX_t, uintX_t>> Offsets = S->Offsets;
 
   // If this is of type string, the contents are null-terminated strings.
   if (this->Header.sh_flags & SHF_STRINGS) {
-    uintX_t Offset = 0;
-    while (!Data.empty()) {
-      size_t End = findNull(Data, EntSize);
-      if (End == StringRef::npos)
-        fatal("string is not null terminated");
-      StringRef Entry = Data.substr(0, End + EntSize);
+    for (unsigned I = 0, N = Offsets.size(); I != N; ++I) {
+      auto &P = Offsets[I];
+      if (P.second == (uintX_t)-1)
+        continue;
+
+      uintX_t Start = P.first;
+      uintX_t End = (I == N - 1) ? Data.size() : Offsets[I + 1].first;
+      StringRef Entry = Data.substr(Start, End - Start);
       uintX_t OutputOffset = Builder.add(Entry);
       if (shouldTailMerge())
         OutputOffset = -1;
-      S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
-      uintX_t Size = End + EntSize;
-      Data = Data.substr(Size);
-      Offset += Size;
+      P.second = OutputOffset;
     }
     return;
   }
 
   // If this is not of type string, every entry has the same size.
-  for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
-    StringRef Entry = Data.substr(I, EntSize);
-    size_t OutputOffset = Builder.add(Entry);
-    S->Offsets.push_back(std::make_pair(I, OutputOffset));
+  for (auto &P : Offsets) {
+    if (P.second == (uintX_t)-1)
+      continue;
+    StringRef Entry = Data.substr(P.first, EntSize);
+    P.second = Builder.add(Entry);
   }
 }
 

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Fri Apr 22 17:09:35 2016
@@ -790,8 +790,19 @@ template <class ELFT> void Writer<ELFT>:
       InputSectionBase<ELFT> *Sec = DR->Section;
       if (!shouldKeepInSymtab<ELFT>(Sec, SymName, *B))
         continue;
-      if (Sec && !Sec->Live)
-        continue;
+      if (Sec) {
+        if (!Sec->Live)
+          continue;
+
+        // Garbage collection is normally able to remove local symbols if they
+        // point to gced sections. In the case of SHF_MERGE sections, we want it
+        // to also be able to drop them if part of the section is gced.
+        // We could look at the section offset map to keep some of these
+        // symbols, but almost all local symbols are .L* symbols, so it
+        // is probably not worth the complexity.
+        if (Config->GcSections && isa<MergeInputSection<ELFT>>(Sec))
+          continue;
+      }
       ++Out<ELFT>::SymTab->NumLocals;
       if (Config->Relocatable)
         B->DynsymIndex = Out<ELFT>::SymTab->NumLocals;

Modified: lld/trunk/test/ELF/gc-merge-local-sym.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-merge-local-sym.s?rev=267233&r1=267232&r2=267233&view=diff
==============================================================================
--- lld/trunk/test/ELF/gc-merge-local-sym.s (original)
+++ lld/trunk/test/ELF/gc-merge-local-sym.s Fri Apr 22 17:09:35 2016
@@ -11,24 +11,18 @@
 // CHECK-NEXT: ]
 // CHECK-NEXT: Address: 0x1C8
 // CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 8
+// CHECK-NEXT: Size: 4
 // CHECK-NEXT: Link: 0
 // CHECK-NEXT: Info: 0
 // CHECK-NEXT: AddressAlignment: 1
 // CHECK-NEXT: EntrySize: 1
 // CHECK-NEXT: SectionData (
-// CHECK-NEXT:   0000: 64656600 61626300 |def.abc.|
+// CHECK-NEXT:   0000: 61626300 |abc.|
 // CHECK-NEXT: )
 
 // CHECK:      Symbols [
 // CHECK:        Symbol {
-// CHECK:          Name: bar
-// CHECK-NEXT:     Value: 0x1C8
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .rodata
+// CHECK-NOT:          Name: bar
 
         .global foo
 foo:

Added: lld/trunk/test/ELF/gc-sections-merge-addend.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge-addend.s?rev=267233&view=auto
==============================================================================
--- lld/trunk/test/ELF/gc-sections-merge-addend.s (added)
+++ lld/trunk/test/ELF/gc-sections-merge-addend.s Fri Apr 22 17:09:35 2016
@@ -0,0 +1,39 @@
+// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux
+// RUN: ld.lld %t.o -o %t.so -shared --gc-sections
+// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
+
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address:
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize: 1
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 62617200                    |bar.|
+// CHECK-NEXT: )
+
+        .section        .data.f,"aw", at progbits
+        .globl  f
+f:
+        .quad .rodata.str1.1 + 4
+
+        .section        .data.g,"aw", at progbits
+        .hidden g
+        .globl  g
+g:
+        .quad .rodata.str1.1
+
+        .section        .rodata.str1.1,"aMS", at progbits,1
+.L.str:
+        .asciz  "foo"
+.L.str.1:
+        .asciz  "bar"

Added: lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s?rev=267233&view=auto
==============================================================================
--- lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s (added)
+++ lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s Fri Apr 22 17:09:35 2016
@@ -0,0 +1,39 @@
+// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=i386-pc-linux
+// RUN: ld.lld %t.o -o %t.so -shared --gc-sections
+// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
+
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address:
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize: 1
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 62617200                    |bar.|
+// CHECK-NEXT: )
+
+        .section        .data.f,"aw", at progbits
+        .globl  f
+f:
+        .long .rodata.str1.1 + 4
+
+        .section        .data.g,"aw", at progbits
+        .hidden g
+        .globl  g
+g:
+        .long .rodata.str1.1
+
+        .section        .rodata.str1.1,"aMS", at progbits,1
+.L.str:
+        .asciz  "foo"
+.L.str.1:
+        .asciz  "bar"

Added: lld/trunk/test/ELF/gc-sections-merge.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge.s?rev=267233&view=auto
==============================================================================
--- lld/trunk/test/ELF/gc-sections-merge.s (added)
+++ lld/trunk/test/ELF/gc-sections-merge.s Fri Apr 22 17:09:35 2016
@@ -0,0 +1,61 @@
+// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux
+// RUN: ld.lld %t.o -o %t.so -shared
+// RUN: ld.lld %t.o -o %t.gc.so -shared --gc-sections
+// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
+// RUN: llvm-readobj -s -section-data %t.gc.so | FileCheck --check-prefix=GC %s
+
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address:
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size: 8
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize: 1
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 666F6F00 62617200                    |foo.bar.|
+// CHECK-NEXT: )
+
+// GC:      Name: .rodata
+// GC-NEXT: Type: SHT_PROGBITS
+// GC-NEXT: Flags [
+// GC-NEXT:   SHF_ALLOC
+// GC-NEXT:   SHF_MERGE
+// GC-NEXT:   SHF_STRINGS
+// GC-NEXT: ]
+// GC-NEXT: Address:
+// GC-NEXT: Offset:
+// GC-NEXT: Size: 4
+// GC-NEXT: Link: 0
+// GC-NEXT: Info: 0
+// GC-NEXT: AddressAlignment: 1
+// GC-NEXT: EntrySize: 1
+// GC-NEXT: SectionData (
+// GC-NEXT:   0000: 666F6F00                                |foo.|
+// GC-NEXT: )
+
+        .section        .text.f,"ax", at progbits
+        .globl  f
+f:
+        leaq    .L.str(%rip), %rax
+        retq
+
+        .section        .text.g,"ax", at progbits
+        .hidden g
+        .globl  g
+g:
+        leaq    .L.str.1(%rip), %rax
+        retq
+
+        .section        .rodata.str1.1,"aMS", at progbits,1
+.L.str:
+        .asciz  "foo"
+.L.str.1:
+        .asciz  "bar"




More information about the llvm-commits mailing list