[lld] r261327 - Add support for merging strings with alignment larger than one char.

Rafael Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 19 06:17:40 PST 2016


Author: rafael
Date: Fri Feb 19 08:17:40 2016
New Revision: 261327

URL: http://llvm.org/viewvc/llvm-project?rev=261327&view=rev
Log:
Add support for merging strings with alignment larger than one char.

This reduces the .rodata of scyladb from 4501932 to 4334639 bytes (1.038
times smaller).

I don't think it is critical to support tail merging, just exact
duplicates, but given the code organization it was actually a bit easier
to support both.

Added:
    lld/trunk/test/ELF/tail-merge-string-align.s
Modified:
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/OutputSections.h
    lld/trunk/ELF/Writer.cpp
    lld/trunk/test/ELF/merge-string-align.s

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=261327&r1=261326&r2=261327&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Fri Feb 19 08:17:40 2016
@@ -162,15 +162,15 @@ static bool shouldMerge(const typename E
   if (!EntSize || Sec.sh_size % EntSize)
     fatal("SHF_MERGE section size must be a multiple of sh_entsize");
 
-  // Don't try to merge if the aligment is larger than the sh_entsize.
+  // Don't try to merge if the aligment is larger than the sh_entsize and this
+  // is not SHF_STRINGS.
   //
-  // If this is not a SHF_STRINGS, we would need to pad after every entity. It
-  // would be equivalent for the producer of the .o to just set a larger
+  // Since this is not a SHF_STRINGS, we would need to pad after every entity.
+  // It would be equivalent for the producer of the .o to just set a larger
   // sh_entsize.
-  //
-  // If this is a SHF_STRINGS, the larger alignment makes sense. Unfortunately
-  // it would complicate tail merging. This doesn't seem that common to
-  // justify the effort.
+  if (Flags & SHF_STRINGS)
+    return true;
+
   if (Sec.sh_addralign > EntSize)
     return false;
 

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=261327&r1=261326&r2=261327&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Fri Feb 19 08:17:40 2016
@@ -1223,8 +1223,9 @@ template <class ELFT> void EHOutputSecti
 
 template <class ELFT>
 MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t Type,
-                                             uintX_t Flags)
-    : OutputSectionBase<ELFT>(Name, Type, Flags) {}
+                                             uintX_t Flags, uintX_t Alignment)
+    : OutputSectionBase<ELFT>(Name, Type, Flags),
+      Builder(llvm::StringTableBuilder::RAW, Alignment) {}
 
 template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) {
   if (shouldTailMerge()) {

Modified: lld/trunk/ELF/OutputSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.h?rev=261327&r1=261326&r2=261327&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.h (original)
+++ lld/trunk/ELF/OutputSections.h Fri Feb 19 08:17:40 2016
@@ -300,14 +300,15 @@ class MergeOutputSection final : public
   bool shouldTailMerge() const;
 
 public:
-  MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags);
+  MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags,
+                     uintX_t Alignment);
   void addSection(InputSectionBase<ELFT> *S) override;
   void writeTo(uint8_t *Buf) override;
   unsigned getOffset(StringRef Val);
   void finalize() override;
 
 private:
-  llvm::StringTableBuilder Builder{llvm::StringTableBuilder::RAW};
+  llvm::StringTableBuilder Builder;
 };
 
 // FDE or CIE

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=261327&r1=261326&r2=261327&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Fri Feb 19 08:17:40 2016
@@ -211,7 +211,7 @@ template <bool Is64Bits> struct SectionK
   StringRef Name;
   uint32_t Type;
   uintX_t Flags;
-  uintX_t EntSize;
+  uintX_t Alignment;
 };
 }
 namespace llvm {
@@ -225,13 +225,13 @@ template <bool Is64Bits> struct DenseMap
                                 0, 0};
   }
   static unsigned getHashValue(const SectionKey<Is64Bits> &Val) {
-    return hash_combine(Val.Name, Val.Type, Val.Flags, Val.EntSize);
+    return hash_combine(Val.Name, Val.Type, Val.Flags, Val.Alignment);
   }
   static bool isEqual(const SectionKey<Is64Bits> &LHS,
                       const SectionKey<Is64Bits> &RHS) {
     return DenseMapInfo<StringRef>::isEqual(LHS.Name, RHS.Name) &&
            LHS.Type == RHS.Type && LHS.Flags == RHS.Flags &&
-           LHS.EntSize == RHS.EntSize;
+           LHS.Alignment == RHS.Alignment;
   }
 };
 }
@@ -840,7 +840,8 @@ OutputSectionFactory<ELFT>::create(Input
     Sec = new EHOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags);
     break;
   case InputSectionBase<ELFT>::Merge:
-    Sec = new MergeOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags);
+    Sec = new MergeOutputSection<ELFT>(Key.Name, Key.Type, Key.Flags,
+                                       Key.Alignment);
     break;
   case InputSectionBase<ELFT>::MipsReginfo:
     Sec = new MipsReginfoOutputSection<ELFT>();
@@ -863,10 +864,15 @@ OutputSectionFactory<ELFT>::createKey(In
   const Elf_Shdr *H = C->getSectionHdr();
   uintX_t Flags = H->sh_flags & ~SHF_GROUP;
 
-  // For SHF_MERGE we create different output sections for each sh_entsize.
-  // This makes each output section simple and keeps a single level
-  // mapping from input to output.
-  uintX_t EntSize = isa<MergeInputSection<ELFT>>(C) ? H->sh_entsize : 0;
+  // For SHF_MERGE we create different output sections for each alignment.
+  // This makes each output section simple and keeps a single level mapping from
+  // input to output.
+  uintX_t Alignment = 0;
+  if (isa<MergeInputSection<ELFT>>(C)) {
+    Alignment = H->sh_addralign;
+    if (H->sh_entsize > Alignment)
+      Alignment = H->sh_entsize;
+  }
 
   // GNU as can give .eh_frame secion type SHT_PROGBITS or SHT_X86_64_UNWIND
   // depending on the construct. We want to canonicalize it so that
@@ -876,7 +882,7 @@ OutputSectionFactory<ELFT>::createKey(In
       isa<EHInputSection<ELFT>>(C))
     Type = SHT_X86_64_UNWIND;
 
-  return SectionKey<ELFT::Is64Bits>{OutsecName, Type, Flags, EntSize};
+  return SectionKey<ELFT::Is64Bits>{OutsecName, Type, Flags, Alignment};
 }
 
 // The linker is expected to define some symbols depending on

Modified: lld/trunk/test/ELF/merge-string-align.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/merge-string-align.s?rev=261327&r1=261326&r2=261327&view=diff
==============================================================================
--- lld/trunk/test/ELF/merge-string-align.s (original)
+++ lld/trunk/test/ELF/merge-string-align.s Fri Feb 19 08:17:40 2016
@@ -1,15 +1,20 @@
 // REQUIRES: x86
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 // RUN: ld.lld %t.o -o %t.so -shared
-// RUN: llvm-readobj -s %t.so | FileCheck %s
+// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
 
-        .section        .rodata.str1.16,"aMS", at progbits,1
+        .section        .rodata.foo,"aMS", at progbits,1
         .align  16
         .asciz "foo"
 
-        .section        .rodata.str1.1,"aMS", at progbits,1
+        .section        .rodata.foo2,"aMS", at progbits,1
+        .align  16
         .asciz "foo"
 
+        .section        .rodata.bar,"aMS", at progbits,1
+        .align  16
+        .asciz "bar"
+
 // CHECK:      Name: .rodata
 // CHECK-NEXT: Type: SHT_PROGBITS
 // CHECK-NEXT: Flags [
@@ -19,10 +24,18 @@
 // CHECK-NEXT: ]
 // CHECK-NEXT: Address:
 // CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 4
+// CHECK-NEXT: Size: 20
 // CHECK-NEXT: Link: 0
 // CHECK-NEXT: Info: 0
 // CHECK-NEXT: AddressAlignment: 16
+// CHECK-NEXT: EntrySize:
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000:  666F6F00 00000000 00000000 00000000  |foo.............|
+// CHECK-NEXT:   0010:  62617200                             |bar.|
+// CHECK-NEXT: )
+
+        .section        .rodata.str1.1,"aMS", at progbits,1
+        .asciz "foo"
 
 // CHECK:      Name: .rodata
 // CHECK-NEXT: Type: SHT_PROGBITS
@@ -37,3 +50,7 @@
 // CHECK-NEXT: Link: 0
 // CHECK-NEXT: Info: 0
 // CHECK-NEXT: AddressAlignment: 1
+// CHECK-NEXT: EntrySize:
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000:  666F6F00 |foo.|
+// CHECK-NEXT: )

Added: lld/trunk/test/ELF/tail-merge-string-align.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/tail-merge-string-align.s?rev=261327&view=auto
==============================================================================
--- lld/trunk/test/ELF/tail-merge-string-align.s (added)
+++ lld/trunk/test/ELF/tail-merge-string-align.s Fri Feb 19 08:17:40 2016
@@ -0,0 +1,35 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: ld.lld %t.o -o %t.so -shared -O3
+// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
+
+        .section        .rodata.4a,"aMS", at progbits,1
+        .align 4
+        .asciz "abcdef"
+
+        .section        .rodata.4b,"aMS", at progbits,1
+        .align 4
+        .asciz "ef"
+
+        .section        .rodata.4c,"aMS", at progbits,1
+        .align 4
+        .asciz "f"
+
+
+// CHECK:      Name: .rodata
+// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_MERGE
+// CHECK-NEXT:   SHF_STRINGS
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address:
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size: 1
+// CHECK-NEXT: Link: 0
+// CHECK-NEXT: Info: 0
+// CHECK-NEXT: AddressAlignment: 4
+// CHECK-NEXT: EntrySize:
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000:    61626364 65660000 6600               |abcdef..f.|
+// CHECK-NEXT: )




More information about the llvm-commits mailing list