[llvm] r205990 - Reimplement debug info compression by compressing the whole section, rather than a fragment.

David Blaikie dblaikie at gmail.com
Thu Apr 10 14:53:53 PDT 2014


Author: dblaikie
Date: Thu Apr 10 16:53:53 2014
New Revision: 205990

URL: http://llvm.org/viewvc/llvm-project?rev=205990&view=rev
Log:
Reimplement debug info compression by compressing the whole section, rather than a fragment.

To support compressing the debug_line section that contains multiple
fragments (due, I believe, to variation in choices of line table
encoding depending on the size of instruction ranges in the actual
program code) we needed to support compressing multiple MCFragments in a
single pass.

This patch implements that behavior by mutating the post-relaxed and
relocated section to be the compressed form of its former self,
including renaming the section.

This is a more flexible (and less invasive, to a degree) implementation
that will allow for other features such as "use compression only if it's
smaller than the uncompressed data".

Compressing debug_frame would be a possible further extension to this
work, but I've left it for now. The hurdle there is alignment sections -
which might require going as far as to refactor
MCAssembler.cpp:writeFragment to handle writing to a byte buffer or an
MCObjectWriter (there's already a virtual call there, so it shouldn't
add substantial compile-time cost) which could in turn involve
refactoring MCAsmBackend::writeNopData to use that same abstraction...
which involves touching all the backends. This would remove the limited
handling of fragment writing seen in
ELFObjectWriter.cpp:getUncompressedData which would be nice - but it's
more invasive.

I did discover that I (perhaps obviously) don't need to handle
relocations when I rewrite the fragments - since the relocations have
already been applied and computed (and stored into
ELFObjectWriter::Relocations) by this stage (necessarily, because we
need to have written any immediate values or assembly-time relocations
into the data already before we compress it, which we have). The test
case doesn't necessarily cover that in detail - I can add more test
coverage if that's preferred.

Modified:
    llvm/trunk/include/llvm/MC/MCContext.h
    llvm/trunk/include/llvm/MC/MCSectionELF.h
    llvm/trunk/lib/MC/ELFObjectWriter.cpp
    llvm/trunk/lib/MC/MCContext.cpp
    llvm/trunk/test/MC/ELF/compression.s

Modified: llvm/trunk/include/llvm/MC/MCContext.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCContext.h?rev=205990&r1=205989&r2=205990&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCContext.h (original)
+++ llvm/trunk/include/llvm/MC/MCContext.h Thu Apr 10 16:53:53 2014
@@ -259,6 +259,8 @@ namespace llvm {
                                       unsigned Flags, SectionKind Kind,
                                       unsigned EntrySize, StringRef Group);
 
+    void renameELFSection(const MCSectionELF *Section, StringRef Name);
+
     const MCSectionELF *CreateELFGroupSection();
 
     const MCSectionCOFF *getCOFFSection(StringRef Section,

Modified: llvm/trunk/include/llvm/MC/MCSectionELF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCSectionELF.h?rev=205990&r1=205989&r2=205990&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCSectionELF.h (original)
+++ llvm/trunk/include/llvm/MC/MCSectionELF.h Thu Apr 10 16:53:53 2014
@@ -53,6 +53,8 @@ private:
     : MCSection(SV_ELF, K), SectionName(Section), Type(type), Flags(flags),
       EntrySize(entrySize), Group(group) {}
   ~MCSectionELF();
+
+  void setSectionName(StringRef Name) { SectionName = Name; }
 public:
 
   /// ShouldOmitSectionDirective - Decides whether a '.section' directive

Modified: llvm/trunk/lib/MC/ELFObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/ELFObjectWriter.cpp?rev=205990&r1=205989&r2=205990&view=diff
==============================================================================
--- llvm/trunk/lib/MC/ELFObjectWriter.cpp (original)
+++ llvm/trunk/lib/MC/ELFObjectWriter.cpp Thu Apr 10 16:53:53 2014
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -27,6 +28,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ELF.h"
@@ -253,6 +255,8 @@ class ELFObjectWriter : public MCObjectW
     void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
                                   RelMapTy &RelMap);
 
+    void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout);
+
     void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
                           const RelMapTy &RelMap);
 
@@ -1168,6 +1172,114 @@ void ELFObjectWriter::CreateRelocationSe
   }
 }
 
+static SmallVector<char, 128> getUncompressedData(MCAsmLayout &Layout, MCSectionData::FragmentListType &Fragments) {
+  SmallVector<char, 128> UncompressedData;
+  for (const MCFragment &F : Fragments) {
+    const SmallVectorImpl<char> *Contents;
+    switch (F.getKind()) {
+    case MCFragment::FT_Data:
+      Contents = &cast<MCDataFragment>(F).getContents();
+      break;
+    case MCFragment::FT_Dwarf:
+      Contents = &cast<MCDwarfLineAddrFragment>(F).getContents();
+      break;
+    case MCFragment::FT_DwarfFrame:
+      Contents = &cast<MCDwarfCallFrameFragment>(F).getContents();
+      break;
+    default:
+      llvm_unreachable(
+          "Not expecting any other fragment types in a debug_* section");
+    }
+    UncompressedData.append(Contents->begin(), Contents->end());
+  }
+  return UncompressedData;
+}
+
+// Include the debug info compression header:
+// "ZLIB" followed by 8 bytes representing the uncompressed size of the section,
+// useful for consumers to preallocate a buffer to decompress into.
+static void prependCompressionHeader(uint64_t Size, SmallVectorImpl<char> &CompressedContents) {
+  static const StringRef Magic = "ZLIB";
+  if (sys::IsLittleEndianHost)
+    Size = sys::SwapByteOrder(Size);
+  CompressedContents.insert(CompressedContents.begin(),
+                            Magic.size() + sizeof(Size), 0);
+  std::copy(Magic.begin(), Magic.end(), CompressedContents.begin());
+  std::copy(reinterpret_cast<char *>(&Size),
+            reinterpret_cast<char *>(&Size + 1),
+            CompressedContents.begin() + Magic.size());
+}
+
+// Return a single fragment containing the compressed contents of the whole
+// section. Null if the section was not compressed for any reason.
+static std::unique_ptr<MCDataFragment> getCompressedFragment(MCAsmLayout &Layout, MCSectionData::FragmentListType &Fragments) {
+  std::unique_ptr<MCDataFragment> CompressedFragment(new MCDataFragment());
+
+  // Gather the uncompressed data from all the fragments, recording the
+  // alignment fragment, if seen, and any fixups.
+  SmallVector<char, 128> UncompressedData =
+      getUncompressedData(Layout, Fragments);
+
+  SmallVectorImpl<char> &CompressedContents = CompressedFragment->getContents();
+
+  zlib::Status Success = zlib::compress(
+      StringRef(UncompressedData.data(), UncompressedData.size()),
+      CompressedContents);
+  if (Success != zlib::StatusOK)
+    return nullptr;
+
+  prependCompressionHeader(UncompressedData.size(), CompressedContents);
+
+  return CompressedFragment;
+}
+
+static void CompressDebugSection(MCAssembler &Asm, MCAsmLayout &Layout,
+                                 const MCSectionELF &Section,
+                                 MCSectionData &SD) {
+  StringRef SectionName = Section.getSectionName();
+  MCSectionData::FragmentListType &Fragments = SD.getFragmentList();
+
+  std::unique_ptr<MCDataFragment> CompressedFragment =
+      getCompressedFragment(Layout, Fragments);
+
+  // Leave the section as-is if the fragments could not be compressed.
+  if (!CompressedFragment)
+    return;
+
+  // Invalidate the layout for the whole section since it will have new and
+  // different fragments now.
+  Layout.invalidateFragmentsFrom(&Fragments.front());
+  Fragments.clear();
+
+  // Complete the initialization of the new fragment
+  CompressedFragment->setParent(&SD);
+  CompressedFragment->setLayoutOrder(0);
+  Fragments.push_back(CompressedFragment.release());
+
+  // Rename from .debug_* to .zdebug_*
+  Asm.getContext().renameELFSection(&Section,
+                                    (".z" + SectionName.drop_front(1)).str());
+}
+
+void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm,
+                                            MCAsmLayout &Layout) {
+  if (!Asm.getContext().getAsmInfo()->compressDebugSections())
+    return;
+
+  for (MCSectionData &SD : Asm) {
+    const MCSectionELF &Section = static_cast<const MCSectionELF&>(SD.getSection());
+    StringRef SectionName = Section.getSectionName();
+
+    // Compressing debug_frame requires handling alignment fragments which is
+    // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
+    // for writing to arbitrary buffers) for little benefit.
+    if (!SectionName.startswith(".debug_") || SectionName == ".debug_frame")
+      continue;
+
+    CompressDebugSection(Asm, Layout, Section, SD);
+  }
+}
+
 void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
                                        const RelMapTy &RelMap) {
   for (MCAssembler::const_iterator it = Asm.begin(),
@@ -1652,6 +1764,8 @@ void ELFObjectWriter::WriteObject(MCAsse
 
   unsigned NumUserSections = Asm.size();
 
+  CompressDebugSections(Asm, const_cast<MCAsmLayout&>(Layout));
+
   DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
   CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
 

Modified: llvm/trunk/lib/MC/MCContext.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCContext.cpp?rev=205990&r1=205989&r2=205990&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCContext.cpp (original)
+++ llvm/trunk/lib/MC/MCContext.cpp Thu Apr 10 16:53:53 2014
@@ -251,6 +251,21 @@ getELFSection(StringRef Section, unsigne
   return getELFSection(Section, Type, Flags, Kind, 0, "");
 }
 
+void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
+  if (ELFUniquingMap == 0)
+    ELFUniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+
+  StringRef GroupName;
+  if (const MCSymbol *Group = Section->getGroup())
+    GroupName = Group->getName();
+
+  Map.erase(SectionGroupPair(Section->getSectionName(), GroupName));
+  auto I = Map.insert(std::make_pair(SectionGroupPair(Name, GroupName),
+                                     Section)).first;
+  const_cast<MCSectionELF*>(Section)->setSectionName(I->first.first);
+}
+
 const MCSectionELF *MCContext::
 getELFSection(StringRef Section, unsigned Type, unsigned Flags,
               SectionKind Kind, unsigned EntrySize, StringRef Group) {

Modified: llvm/trunk/test/MC/ELF/compression.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ELF/compression.s?rev=205990&r1=205989&r2=205990&view=diff
==============================================================================
--- llvm/trunk/test/MC/ELF/compression.s (original)
+++ llvm/trunk/test/MC/ELF/compression.s Thu Apr 10 16:53:53 2014
@@ -1,25 +1,35 @@
-// RUN: llvm-mc -filetype=obj -compress-debug-sections -triple x86_64-pc-linux-gnu %s -o - | llvm-objdump -s - | FileCheck %s
-
-// XFAIL: *
+// RUN: llvm-mc -filetype=obj -compress-debug-sections -triple x86_64-pc-linux-gnu %s -o %t
+// RUN: llvm-objdump -s %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=abbrev %t | FileCheck --check-prefix=ABBREV %s
 
 // REQUIRES: zlib
 
-// CHECK: Contents of section .debug_line:
-// FIXME: Figure out how to handle debug_line that currently uses multiple section fragments
+// CHECK: Contents of section .zdebug_line:
+// Check for the 'ZLIB' file magic at the start of the section only
+// CHECK-NEXT: ZLIB
 // CHECK-NOT: ZLIB
+// CHECK: Contents of
 
 // CHECK: Contents of section .zdebug_abbrev:
-// Check for the 'ZLIB' file magic at the start of the section
 // CHECK-NEXT: ZLIB
 
-// We shouldn't compress the debug_frame section, since it can be relaxed
-// CHECK: Contents of section .debug_frame
+// FIXME: Handle compressing alignment fragments to support compressing debug_frame
+// CHECK: Contents of section .debug_frame:
 // CHECK-NOT: ZLIB
+// CHECK: Contents of
+
+// Decompress one valid dwarf section just to check that this roundtrips
+// ABBREV: Abbrev table for offset: 0x00000000
+// ABBREV: [1] DW_TAG_compile_unit DW_CHILDREN_no
 
 	.section	.debug_line,"", at progbits
 
 	.section	.debug_abbrev,"", at progbits
 	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	0                       # DW_CHILDREN_no
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
 	.text
 foo:
 	.cfi_startproc





More information about the llvm-commits mailing list