[lld] r284068 - Support GNU-style ZLIB-compressed input sections.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 12 15:36:32 PDT 2016


Author: ruiu
Date: Wed Oct 12 17:36:31 2016
New Revision: 284068

URL: http://llvm.org/viewvc/llvm-project?rev=284068&view=rev
Log:
Support GNU-style ZLIB-compressed input sections.

Previously, we supported only SHF_COMPRESSED sections because it's
new and it's the ELF standard. But there are object files compressed
in the GNU style out there, so we had to support it.

Sections compressed in the GNU style start with ".zdebug_" and
contain different headers than the ELF standard's one. In this
patch, getRawCompressedData is responsible to handle it.

A tricky thing about GNU-style compressed sections is that we have
to rename them when creating output sections. ".zdebug_" prefix
implies the section is compressed. We need to rename ".zdebug_"
".debug" because our output sections are not compressed.
We do that in this patch.

Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/LinkerScript.cpp
    lld/trunk/ELF/Writer.cpp
    lld/trunk/ELF/Writer.h
    lld/trunk/test/ELF/compressed-debug-input.s

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Wed Oct 12 17:36:31 2016
@@ -23,6 +23,7 @@
 using namespace llvm;
 using namespace llvm::ELF;
 using namespace llvm::object;
+using namespace llvm::support;
 using namespace llvm::support::endian;
 
 using namespace lld;
@@ -40,12 +41,19 @@ static ArrayRef<uint8_t> getSectionConte
   return check(File->getObj().getSectionContents(Hdr));
 }
 
+// ELF supports ZLIB-compressed section. Returns true if the section
+// is compressed.
+template <class ELFT>
+static bool isCompressed(const typename ELFT::Shdr *Hdr, StringRef Name) {
+  return (Hdr->sh_flags & SHF_COMPRESSED) || Name.startswith(".zdebug");
+}
+
 template <class ELFT>
 InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File,
                                          const Elf_Shdr *Hdr, StringRef Name,
                                          Kind SectionKind)
     : InputSectionData(SectionKind, Name, getSectionContents(File, Hdr),
-                       Hdr->sh_flags & SHF_COMPRESSED, !Config->GcSections),
+                       isCompressed<ELFT>(Hdr, Name), !Config->GcSections),
       Header(Hdr), File(File), Repl(this) {
   // The ELF spec states that a value of 0 means the section has
   // no alignment constraits.
@@ -100,30 +108,62 @@ typename ELFT::uint InputSectionBase<ELF
   llvm_unreachable("invalid section kind");
 }
 
-template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
-  if (!zlib::isAvailable())
-    fatal(getName(this) +
-          ": build lld with zlib to enable compressed sections support");
-
-  // A compressed section consists of a header of Elf_Chdr type
-  // followed by compressed data.
+// Returns compressed data and its size when uncompressed.
+template <class ELFT>
+std::pair<ArrayRef<uint8_t>, uint64_t>
+InputSectionBase<ELFT>::getElfCompressedData(ArrayRef<uint8_t> Data) {
+  // Compressed section with Elf_Chdr is the ELF standard.
   if (Data.size() < sizeof(Elf_Chdr))
-    fatal("corrupt compressed section");
-
+    fatal(getName(this) + ": corrupted compressed section");
   auto *Hdr = reinterpret_cast<const Elf_Chdr *>(Data.data());
-  Data = Data.slice(sizeof(Elf_Chdr));
-
   if (Hdr->ch_type != ELFCOMPRESS_ZLIB)
     fatal(getName(this) + ": unsupported compression type");
+  return {Data.slice(sizeof(*Hdr)), Hdr->ch_size};
+}
+
+// Returns compressed data and its size when uncompressed.
+template <class ELFT>
+std::pair<ArrayRef<uint8_t>, uint64_t>
+InputSectionBase<ELFT>::getRawCompressedData(ArrayRef<uint8_t> Data) {
+  // Compressed sections without Elf_Chdr header contain this header
+  // instead. This is a GNU extension.
+  struct ZlibHeader {
+    char magic[4]; // should be "ZLIB"
+    char Size[8];  // Uncompressed size in big-endian
+  };
+
+  if (Data.size() < sizeof(ZlibHeader))
+    fatal(getName(this) + ": corrupted compressed section");
+  auto *Hdr = reinterpret_cast<const ZlibHeader *>(Data.data());
+  if (memcmp(Hdr->magic, "ZLIB", 4))
+    fatal(getName(this) + ": broken ZLIB-compressed section");
+  return {Data.slice(sizeof(*Hdr)), read64be(Hdr->Size)};
+}
+
+template <class ELFT> void InputSectionBase<ELFT>::uncompress() {
+  if (!zlib::isAvailable())
+    fatal(getName(this) +
+          ": build lld with zlib to enable compressed sections support");
 
-  StringRef Buf((const char *)Data.data(), Data.size());
-  size_t UncompressedDataSize = Hdr->ch_size;
-  UncompressedData.reset(new char[UncompressedDataSize]);
-  if (zlib::uncompress(Buf, UncompressedData.get(), UncompressedDataSize) !=
-      zlib::StatusOK)
-    fatal(getName(this) + ": error uncompressing section");
-  Data = ArrayRef<uint8_t>((uint8_t *)UncompressedData.get(),
-                           UncompressedDataSize);
+  // This section is compressed. Here we decompress it. Ideally, all
+  // compressed sections have SHF_COMPRESSED bit and their contents
+  // start with headers of Elf_Chdr type. However, sections whose
+  // names start with ".zdebug_" don't have the bit and contains a raw
+  // ZLIB-compressed data (which is a bad thing because section names
+  // shouldn't be significant in ELF.) We need to be able to read both.
+  ArrayRef<uint8_t> Buf; // Compressed data
+  size_t Size;           // Uncompressed size
+  if (Header->sh_flags & SHF_COMPRESSED)
+    std::tie(Buf, Size) = getElfCompressedData(Data);
+  else
+    std::tie(Buf, Size) = getRawCompressedData(Data);
+
+  // Uncompress Buf.
+  UncompressedData.reset(new uint8_t[Size]);
+  if (zlib::uncompress(StringRef((const char *)Buf.data(), Buf.size()),
+                       (char *)UncompressedData.get(), Size) != zlib::StatusOK)
+    fatal(getName(this) + ": error while uncompressing section");
+  Data = ArrayRef<uint8_t>(UncompressedData.get(), Size);
 }
 
 template <class ELFT>

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Wed Oct 12 17:36:31 2016
@@ -67,7 +67,7 @@ public:
   ArrayRef<uint8_t> getData(const SectionPiece &P) const;
 
   // If a section is compressed, this has the uncompressed section data.
-  std::unique_ptr<char[]> UncompressedData;
+  std::unique_ptr<uint8_t[]> UncompressedData;
 
   std::vector<Relocation> Relocations;
 };
@@ -118,6 +118,13 @@ public:
   void uncompress();
 
   void relocate(uint8_t *Buf, uint8_t *BufEnd);
+
+private:
+  std::pair<ArrayRef<uint8_t>, uint64_t>
+  getElfCompressedData(ArrayRef<uint8_t> Data);
+
+  std::pair<ArrayRef<uint8_t>, uint64_t>
+  getRawCompressedData(ArrayRef<uint8_t> Data);
 };
 
 template <class ELFT> InputSectionBase<ELFT> InputSectionBase<ELFT>::Discarded;

Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Wed Oct 12 17:36:31 2016
@@ -351,7 +351,7 @@ void LinkerScript<ELFT>::createSections(
   for (ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles())
     for (InputSectionBase<ELFT> *S : F->getSections())
       if (!isDiscarded(S) && !S->OutSec)
-        addSection(Factory, S, getOutputSectionName(S->Name));
+        addSection(Factory, S, getOutputSectionName(S->Name, Opt.Alloc));
 }
 
 // Sets value of a section-defined symbol. Two kinds of

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Wed Oct 12 17:36:31 2016
@@ -91,7 +91,7 @@ private:
 };
 } // anonymous namespace
 
-StringRef elf::getOutputSectionName(StringRef Name) {
+StringRef elf::getOutputSectionName(StringRef Name, BumpPtrAllocator &Alloc) {
   if (Config->Relocatable)
     return Name;
 
@@ -103,6 +103,11 @@ StringRef elf::getOutputSectionName(Stri
     if (Name.startswith(V) || Name == Prefix)
       return Prefix;
   }
+
+  // ".zdebug_" is a prefix for ZLIB-compressed sections.
+  // Because we decompressed input sections, we want to remove 'z'.
+  if (Name.startswith(".zdebug_"))
+    return StringSaver(Alloc).save(Twine(".") + Name.substr(2));
   return Name;
 }
 
@@ -699,7 +704,8 @@ template <class ELFT> void Writer<ELFT>:
       }
       OutputSectionBase<ELFT> *Sec;
       bool IsNew;
-      std::tie(Sec, IsNew) = Factory.create(IS, getOutputSectionName(IS->Name));
+      StringRef OutsecName = getOutputSectionName(IS->Name, Alloc);
+      std::tie(Sec, IsNew) = Factory.create(IS, OutsecName);
       if (IsNew)
         OutputSections.push_back(Sec);
       Sec->addSection(IS);

Modified: lld/trunk/ELF/Writer.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.h?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.h (original)
+++ lld/trunk/ELF/Writer.h Wed Oct 12 17:36:31 2016
@@ -10,13 +10,11 @@
 #ifndef LLD_ELF_WRITER_H
 #define LLD_ELF_WRITER_H
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
 #include <cstdint>
 #include <memory>
 
-namespace llvm {
-  class StringRef;
-}
-
 namespace lld {
 namespace elf {
 template <class ELFT> class OutputSectionBase;
@@ -41,7 +39,8 @@ struct PhdrEntry {
   bool HasLMA = false;
 };
 
-llvm::StringRef getOutputSectionName(llvm::StringRef Name);
+llvm::StringRef getOutputSectionName(llvm::StringRef Name,
+                                     llvm::BumpPtrAllocator &Alloc);
 
 template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS);
 

Modified: lld/trunk/test/ELF/compressed-debug-input.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/compressed-debug-input.s?rev=284068&r1=284067&r2=284068&view=diff
==============================================================================
--- lld/trunk/test/ELF/compressed-debug-input.s (original)
+++ lld/trunk/test/ELF/compressed-debug-input.s Wed Oct 12 17:36:31 2016
@@ -1,52 +1,73 @@
 # REQUIRES: zlib
 
 # RUN: llvm-mc -compress-debug-sections=zlib -filetype=obj -triple=x86_64-unknown-linux %s -o %t
-# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=COMPRESSED %s
+# RUN: llvm-readobj -sections %t | FileCheck -check-prefix=ZLIB %s
+# ZLIB:      Section {
+# ZLIB:        Index: 2
+# ZLIB:        Name: .debug_str
+# ZLIB-NEXT:   Type: SHT_PROGBITS
+# ZLIB-NEXT:   Flags [
+# ZLIB-NEXT:     SHF_COMPRESSED (0x800)
+# ZLIB-NEXT:     SHF_MERGE (0x10)
+# ZLIB-NEXT:     SHF_STRINGS (0x20)
+# ZLIB-NEXT:   ]
+# ZLIB-NEXT:   Address:
+# ZLIB-NEXT:   Offset:
+# ZLIB-NEXT:   Size:
+# ZLIB-NEXT:   Link:
+# ZLIB-NEXT:   Info:
+# ZLIB-NEXT:   AddressAlignment: 1
+# ZLIB-NEXT:   EntrySize: 1
+# ZLIB-NEXT: }
 
-# COMPRESSED:      Section {
-# COMPRESSED:        Index: 2
-# COMPRESSED:        Name: .debug_str
-# COMPRESSED-NEXT:   Type: SHT_PROGBITS
-# COMPRESSED-NEXT:   Flags [
-# COMPRESSED-NEXT:     SHF_COMPRESSED (0x800)
-# COMPRESSED-NEXT:     SHF_MERGE (0x10)
-# COMPRESSED-NEXT:     SHF_STRINGS (0x20)
-# COMPRESSED-NEXT:   ]
-# COMPRESSED-NEXT:   Address:
-# COMPRESSED-NEXT:   Offset:
-# COMPRESSED-NEXT:   Size: 66
-# COMPRESSED-NEXT:   Link:
-# COMPRESSED-NEXT:   Info:
-# COMPRESSED-NEXT:   AddressAlignment: 1
-# COMPRESSED-NEXT:   EntrySize: 1
-# COMPRESSED-NEXT: }
+# RUN: llvm-mc -compress-debug-sections=zlib-gnu -filetype=obj -triple=x86_64-unknown-linux %s -o %t2
+# RUN: llvm-readobj -sections %t2 | FileCheck -check-prefix=GNU %s
+# GNU:      Section {
+# GNU:        Index: 2
+# GNU:        Name: .zdebug_str
+# GNU-NEXT:   Type: SHT_PROGBITS
+# GNU-NEXT:   Flags [
+# GNU-NEXT:     SHF_MERGE (0x10)
+# GNU-NEXT:     SHF_STRINGS (0x20)
+# GNU-NEXT:   ]
+# GNU-NEXT:   Address:
+# GNU-NEXT:   Offset:
+# GNU-NEXT:   Size:
+# GNU-NEXT:   Link:
+# GNU-NEXT:   Info:
+# GNU-NEXT:   AddressAlignment: 1
+# GNU-NEXT:   EntrySize: 1
+# GNU-NEXT: }
 
 # RUN: ld.lld %t -o %t.so -shared
-# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=UNCOMPRESSED %s
+# RUN: llvm-readobj -sections -section-data %t.so | FileCheck -check-prefix=DATA %s
 
-# UNCOMPRESSED:      Section {
-# UNCOMPRESSED:        Index: 6
-# UNCOMPRESSED:        Name: .debug_str
-# UNCOMPRESSED-NEXT:   Type: SHT_PROGBITS
-# UNCOMPRESSED-NEXT:   Flags [
-# UNCOMPRESSED-NEXT:     SHF_MERGE (0x10)
-# UNCOMPRESSED-NEXT:     SHF_STRINGS (0x20)
-# UNCOMPRESSED-NEXT:   ]
-# UNCOMPRESSED-NEXT:   Address: 0x0
-# UNCOMPRESSED-NEXT:   Offset: 0x1060
-# UNCOMPRESSED-NEXT:   Size: 69
-# UNCOMPRESSED-NEXT:   Link: 0
-# UNCOMPRESSED-NEXT:   Info: 0
-# UNCOMPRESSED-NEXT:   AddressAlignment: 1
-# UNCOMPRESSED-NEXT:   EntrySize: 1
-# UNCOMPRESSED-NEXT:   SectionData (
-# UNCOMPRESSED-NEXT:     0000: 73686F72 7420756E 7369676E 65642069  |short unsigned i|
-# UNCOMPRESSED-NEXT:     0010: 6E740075 6E736967 6E656420 696E7400  |nt.unsigned int.|
-# UNCOMPRESSED-NEXT:     0020: 6C6F6E67 20756E73 69676E65 6420696E  |long unsigned in|
-# UNCOMPRESSED-NEXT:     0030: 74006368 61720075 6E736967 6E656420  |t.char.unsigned |
-# UNCOMPRESSED-NEXT:     0040: 63686172 00                          |char.|
-# UNCOMPRESSED-NEXT:   )
-# UNCOMPRESSED-NEXT: }
+# RUN: ld.lld %t2 -o %t2.so -shared
+# RUN: llvm-readobj -sections -section-data %t2.so | FileCheck -check-prefix=DATA %s
+
+# DATA:      Section {
+# DATA:        Index: 6
+# DATA:        Name: .debug_str
+# DATA-NEXT:   Type: SHT_PROGBITS
+# DATA-NEXT:   Flags [
+# DATA-NEXT:     SHF_MERGE (0x10)
+# DATA-NEXT:     SHF_STRINGS (0x20)
+# DATA-NEXT:   ]
+# DATA-NEXT:   Address: 0x0
+# DATA-NEXT:   Offset: 0x1060
+# DATA-NEXT:   Size: 69
+# DATA-NEXT:   Link: 0
+# DATA-NEXT:   Info: 0
+# DATA-NEXT:   AddressAlignment: 1
+# DATA-NEXT:   EntrySize: 1
+# DATA-NEXT:   SectionData (
+# DATA-NEXT:     0000: 73686F72 7420756E 7369676E 65642069  |short unsigned i|
+# DATA-NEXT:     0010: 6E740075 6E736967 6E656420 696E7400  |nt.unsigned int.|
+# DATA-NEXT:     0020: 6C6F6E67 20756E73 69676E65 6420696E  |long unsigned in|
+# DATA-NEXT:     0030: 74006368 61720075 6E736967 6E656420  |t.char.unsigned |
+# DATA-NEXT:     0040: 63686172 00                          |char.|
+# DATA-NEXT:   )
+# DATA-NEXT: }
 
 .section .debug_str,"MS", at progbits,1
 .LASF2:




More information about the llvm-commits mailing list