[lld] r251212 - Add support for merging string from SHF_STRINGS sections.

Rafael Espindola via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 24 15:51:01 PDT 2015


Author: rafael
Date: Sat Oct 24 17:51:01 2015
New Revision: 251212

URL: http://llvm.org/viewvc/llvm-project?rev=251212&view=rev
Log:
Add support for merging string from SHF_STRINGS sections.

Added:
    lld/trunk/test/elf2/merge-string-align.s
    lld/trunk/test/elf2/merge-string-error.s
    lld/trunk/test/elf2/merge-string-no-null.s
    lld/trunk/test/elf2/merge-string.s
Modified:
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/OutputSections.h

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=251212&r1=251211&r2=251212&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Sat Oct 24 17:51:01 2015
@@ -121,6 +121,33 @@ ObjectFile<ELFT>::getShtGroupEntries(con
 }
 
 template <class ELFT>
+static bool shouldMerge(const typename ELFFile<ELFT>::Elf_Shdr &Sec) {
+  typedef typename ELFFile<ELFT>::uintX_t uintX_t;
+  uintX_t Flags = Sec.sh_flags;
+  if (!(Flags & SHF_MERGE))
+    return false;
+  if (Flags & SHF_WRITE)
+    error("Writable SHF_MERGE sections are not supported");
+  uintX_t EntSize = Sec.sh_entsize;
+  if (Sec.sh_size % EntSize)
+    error("SHF_MERGE section size must be a multiple of sh_entsize");
+
+  // Don't try to merge if the aligment is larger than the sh_entsize.
+  //
+  // If this is not a SHF_STRINGS, we would need to pad after every entity. It
+  // would be equivalent for the producer of the .o to just set a larger
+  // sh_entsize.
+  //
+  // If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately
+  // it would complicate tail merging. This doesn't seem that common to
+  // justify the effort.
+  if (Sec.sh_addralign > EntSize)
+    return false;
+
+  return true;
+}
+
+template <class ELFT>
 void elf2::ObjectFile<ELFT>::initializeSections(DenseSet<StringRef> &Comdats) {
   uint64_t Size = this->ELFObj.getNumSections();
   Sections.resize(Size);
@@ -170,18 +197,13 @@ void elf2::ObjectFile<ELFT>::initializeS
         error("Relocations pointing to SHF_MERGE are not supported");
       break;
     }
-    default: {
-      uintX_t Flags = Sec.sh_flags;
-      if (Flags & SHF_MERGE && !(Flags & SHF_STRINGS)) {
-        if (Flags & SHF_WRITE)
-          error("Writable SHF_MERGE sections are not supported");
+    default:
+      if (shouldMerge<ELFT>(Sec))
         Sections[I] = new (this->Alloc) MergeInputSection<ELFT>(this, &Sec);
-      } else {
+      else
         Sections[I] = new (this->Alloc) InputSection<ELFT>(this, &Sec);
-      }
       break;
     }
-    }
   }
 }
 

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=251212&r1=251211&r2=251212&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Sat Oct 24 17:51:01 2015
@@ -129,20 +129,36 @@ bool MergeInputSection<ELFT>::classof(co
   return S->SectionKind == Base::Merge;
 }
 
-// FIXME: Optimize this by keeping an offset for each element.
 template <class ELFT>
 typename MergeInputSection<ELFT>::uintX_t
 MergeInputSection<ELFT>::getOffset(uintX_t Offset) {
-  ArrayRef<uint8_t> Data = this->getSectionData();
-  uintX_t EntSize = this->Header->sh_entsize;
-  uintX_t Addend = Offset % EntSize;
-  Offset -= Addend;
-  if (Offset + EntSize > Data.size())
+  ArrayRef<uint8_t> D = this->getSectionData();
+  StringRef Data((char *)D.data(), D.size());
+  uintX_t Size = Data.size();
+  if (Offset >= Size)
     error("Entry is past the end of the section");
-  Data = Data.slice(Offset, EntSize);
-  return static_cast<MergeOutputSection<ELFT> *>(this->OutSec)
-             ->getOffset(Data) +
-         Addend;
+
+  // Find the element this offset points to.
+  auto I = std::upper_bound(
+      this->Offsets.begin(), this->Offsets.end(), Offset,
+      [](const uintX_t &A, const std::pair<uintX_t, uintX_t> &B) {
+        return A < B.first;
+      });
+  size_t End = I == this->Offsets.end() ? Data.size() : I->first;
+  --I;
+  uintX_t Start = I->first;
+
+  // Compute the Addend and if the Base is cached, return.
+  uintX_t Addend = Offset - Start;
+  uintX_t &Base = I->second;
+  if (Base != uintX_t(-1))
+    return Base + Addend;
+
+  // Map the base to the offset in the output section and cashe it.
+  StringRef Entry = Data.substr(Start, End - Start);
+  Base =
+      static_cast<MergeOutputSection<ELFT> *>(this->OutSec)->getOffset(Entry);
+  return Base + Addend;
 }
 
 namespace lld {

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=251212&r1=251211&r2=251212&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Sat Oct 24 17:51:01 2015
@@ -78,8 +78,11 @@ template <class ELFT> class MergeInputSe
   typedef typename llvm::object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
 
 public:
+  std::vector<std::pair<uintX_t, uintX_t>> Offsets;
   MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header);
   static bool classof(const InputSectionBase<ELFT> *S);
+  // Translate an offset in the input section to an offset in the output
+  // section.
   uintX_t getOffset(uintX_t Offset);
 };
 

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=251212&r1=251211&r2=251212&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Sat Oct 24 17:51:01 2015
@@ -718,11 +718,28 @@ MergeOutputSection<ELFT>::MergeOutputSec
     : OutputSectionBase<ELFT>(Name, sh_type, sh_flags) {}
 
 template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) {
-  for (const std::pair<ArrayRef<uint8_t>, uintX_t> &P : Offsets) {
-    ArrayRef<uint8_t> Data = P.first;
+  if (shouldTailMerge()) {
+    StringRef Data = Builder.data();
     memcpy(Buf, Data.data(), Data.size());
-    Buf += Data.size();
+    return;
   }
+  for (const std::pair<StringRef, size_t> &P : Builder.getMap()) {
+    StringRef Data = P.first;
+    memcpy(Buf + P.second, Data.data(), Data.size());
+  }
+}
+
+static size_t findNull(StringRef S, size_t EntSize) {
+  // Optimize the common case.
+  if (EntSize == 1)
+    return S.find(0);
+
+  for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
+    const char *B = S.begin() + I;
+    if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
+      return I;
+  }
+  return StringRef::npos;
 }
 
 template <class ELFT>
@@ -732,22 +749,48 @@ void MergeOutputSection<ELFT>::addSectio
   if (Align > this->Header.sh_addralign)
     this->Header.sh_addralign = Align;
 
-  uintX_t Off = this->Header.sh_size;
-  ArrayRef<uint8_t> Data = S->getSectionData();
+  ArrayRef<uint8_t> D = S->getSectionData();
+  StringRef Data((char *)D.data(), D.size());
   uintX_t EntSize = S->getSectionHdr()->sh_entsize;
-  if (Data.size() % EntSize)
-    error("SHF_MERGE section size must be a multiple of sh_entsize");
-  for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
-    auto P = Offsets.insert(std::make_pair(Data.slice(I, EntSize), Off));
-    if (P.second)
-      Off += EntSize;
+  uintX_t Offset = 0;
+
+  if (this->Header.sh_flags & SHF_STRINGS) {
+    while (!Data.empty()) {
+      size_t End = findNull(Data, EntSize);
+      if (End == StringRef::npos)
+        error("String is not null terminated");
+      StringRef Entry = Data.substr(0, End + EntSize);
+      size_t OutputOffset = Builder.add(Entry);
+      if (shouldTailMerge())
+        OutputOffset = -1;
+      S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
+      uintX_t Size = End + EntSize;
+      Data = Data.substr(Size);
+      Offset += Size;
+    }
+  } else {
+    for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
+      StringRef Entry = Data.substr(I, EntSize);
+      size_t OutputOffset = Builder.add(Entry);
+      S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
+      Offset += EntSize;
+    }
   }
-  this->Header.sh_size = Off;
 }
 
 template <class ELFT>
-unsigned MergeOutputSection<ELFT>::getOffset(ArrayRef<uint8_t> Val) {
-  return Offsets.find(Val)->second;
+unsigned MergeOutputSection<ELFT>::getOffset(StringRef Val) {
+  return Builder.getOffset(Val);
+}
+
+template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const {
+  return Config->Optimize >= 2 && this->Header.sh_flags & SHF_STRINGS;
+}
+
+template <class ELFT> void MergeOutputSection<ELFT>::finalize() {
+  if (shouldTailMerge())
+    Builder.finalize();
+  this->Header.sh_size = Builder.getSize();
 }
 
 template <class ELFT>

Modified: lld/trunk/ELF/OutputSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.h?rev=251212&r1=251211&r2=251212&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.h (original)
+++ lld/trunk/ELF/OutputSections.h Sat Oct 24 17:51:01 2015
@@ -240,17 +240,17 @@ template <class ELFT>
 class MergeOutputSection final : public OutputSectionBase<ELFT> {
   typedef typename OutputSectionBase<ELFT>::uintX_t uintX_t;
 
+  bool shouldTailMerge() const;
+
 public:
   MergeOutputSection(StringRef Name, uint32_t sh_type, uintX_t sh_flags);
   void addSection(MergeInputSection<ELFT> *S);
   void writeTo(uint8_t *Buf) override;
-
-  unsigned getOffset(ArrayRef<uint8_t> Val);
+  unsigned getOffset(StringRef Val);
+  void finalize() override;
 
 private:
-  // This map is used to find if we already have an entry for a given value and,
-  // if so, at what offset it is.
-  llvm::MapVector<ArrayRef<uint8_t>, uintX_t> Offsets;
+  llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW};
 };
 
 template <class ELFT>

Added: lld/trunk/test/elf2/merge-string-align.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/elf2/merge-string-align.s?rev=251212&view=auto
==============================================================================
--- lld/trunk/test/elf2/merge-string-align.s (added)
+++ lld/trunk/test/elf2/merge-string-align.s Sat Oct 24 17:51:01 2015
@@ -0,0 +1,39 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: ld.lld2 %t.o -o %t.so -shared
+// RUN: llvm-readobj -s %t.so | FileCheck %s
+
+        .section        .rodata.str1.16,"aMS", at progbits,1
+        .align  16
+        .asciz "foo"
+
+        .section        .rodata.str1.1,"aMS", at progbits,1
+        .asciz "foo"
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address: 0x120
+// CHECK-NEXT: Offset: 0x120
+// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 16
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address: 0x124
+// CHECK-NEXT: Offset: 0x124
+// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1

Added: lld/trunk/test/elf2/merge-string-error.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/elf2/merge-string-error.s?rev=251212&view=auto
==============================================================================
--- lld/trunk/test/elf2/merge-string-error.s (added)
+++ lld/trunk/test/elf2/merge-string-error.s Sat Oct 24 17:51:01 2015
@@ -0,0 +1,11 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s
+
+        .section	.rodata.str1.1,"aMS", at progbits,1
+	.asciz	"abc"
+
+        .text
+        .long .rodata.str1.1 + 4
+
+// CHECK: Entry is past the end of the section

Added: lld/trunk/test/elf2/merge-string-no-null.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/elf2/merge-string-no-null.s?rev=251212&view=auto
==============================================================================
--- lld/trunk/test/elf2/merge-string-no-null.s (added)
+++ lld/trunk/test/elf2/merge-string-no-null.s Sat Oct 24 17:51:01 2015
@@ -0,0 +1,8 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: not ld.lld2 %t.o -o %t.so -shared 2>&1 | FileCheck %s
+
+	.section	.rodata.str1.1,"aMS", at progbits,1
+	.ascii	"abc"
+
+// CHECK: String is not null terminated

Added: lld/trunk/test/elf2/merge-string.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/elf2/merge-string.s?rev=251212&view=auto
==============================================================================
--- lld/trunk/test/elf2/merge-string.s (added)
+++ lld/trunk/test/elf2/merge-string.s Sat Oct 24 17:51:01 2015
@@ -0,0 +1,85 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: ld.lld2 -O2 %t.o -o %t.so -shared
+// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck %s
+// RUN: ld.lld2 -O1 %t.o -o %t.so -shared
+// RUN: llvm-readobj -s -section-data -t %t.so | FileCheck --check-prefix=NOTAIL %s
+
+        .section	.rodata.str1.1,"aMS", at progbits,1
+	.asciz	"abc"
+foo:
+	.ascii	"a"
+bar:
+        .asciz  "bc"
+        .asciz  "bc"
+
+        .section        .rodata.str2.2,"aMS", at progbits,2
+        .align  2
+zed:
+        .short  20
+        .short  0
+
+// CHECK:      Name:    .rodata
+// CHECK-NEXT: Type:    SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address:         0x120
+// CHECK-NEXT: Offset:  0x120
+// CHECK-NEXT: Size:    4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize: 0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 61626300                             |abc.|
+// CHECK-NEXT: )
+
+// NOTAIL:      Name:    .rodata
+// NOTAIL-NEXT: Type:    SHT_PROGBITS
+// NOTAIL-NEXT: Flags [
+// NOTAIL-NEXT:   SHF_ALLOC
+// NOTAIL-NEXT:   SHF_MERGE
+// NOTAIL-NEXT:   SHF_STRINGS
+// NOTAIL-NEXT: ]
+// NOTAIL-NEXT: Address:         0x120
+// NOTAIL-NEXT: Offset:  0x120
+// NOTAIL-NEXT: Size:    7
+// NOTAIL-NEXT: Link: 0
+// NOTAIL-NEXT: Info: 0
+// NOTAIL-NEXT: AddressAlignment: 1
+// NOTAIL-NEXT: EntrySize: 0
+// NOTAIL-NEXT: SectionData (
+// NOTAIL-NEXT:   0000: 61626300 626300                     |abc.bc.|
+// NOTAIL-NEXT: )
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address: 0x124
+// CHECK-NEXT: Offset: 0x124
+// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 2
+// CHECK-NEXT: EntrySize: 0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 14000000                             |....|
+// CHECK-NEXT: )
+
+
+// CHECK:      Name:    bar
+// CHECK-NEXT: Value:   0x121
+
+// CHECK:      Name:    foo
+// CHECK-NEXT: Value:   0x120
+
+// CHECK:      Name: zed
+// CHECK-NEXT: Value: 0x124
+// CHECK-NEXT: Size: 0




More information about the llvm-commits mailing list