[patch] compress debug sections

David Blaikie dblaikie at gmail.com
Wed Mar 19 15:34:26 PDT 2014


So I've poked around with trying to implement
-Wa,-compress-debug-sections and come up with the following patch.

(there's some Clang changes too, to initialize the MCContext flag -
but for now if you want to experiment with this you can just change
the ctor initialization of that flag to force compression on always)

I have a bunch of uncertainties about this approach

* compressing on a per-fragment basis. This 'works', since we never
create more than one fragment for each debug info section (so far as I
know) - but I don't understand the fragments well enough to know if
this is guaranteed. If this isn't guaranteed, then we have to move the
compression up into somewhere like ELFObjectWriter::WriteObject (since
we'll have to do a cross-fragment computation of size and emission)

* Is there a nicer way to detect the sections that should be
compressed (than just check for a .debug_ prefix?)

* A nicer way to detect that we're in a compressed section so we
create a compressed fragment

* Caching the compressed data - should that cache be invalidated or
regenerated if the underlying data ever changes? Or can we rely on the
size/contents never changing after its queried for during object
emission?
-------------- next part --------------
commit 5ff67bb51e20cacfd56edab92b622e87562cb529
Author: David Blaikie <dblaikie at gmail.com>
Date:   Tue Mar 18 17:01:46 2014 -0700

    First pass

diff --git include/llvm/MC/MCAssembler.h include/llvm/MC/MCAssembler.h
index c4b475e..78f029d 100644
--- include/llvm/MC/MCAssembler.h
+++ include/llvm/MC/MCAssembler.h
@@ -50,6 +50,7 @@ public:
   enum FragmentType {
     FT_Align,
     FT_Data,
+    FT_Compressed,
     FT_CompactEncodedInst,
     FT_Fill,
     FT_Relaxable,
@@ -159,6 +160,7 @@ public:
         return false;
       case MCFragment::FT_Relaxable:
       case MCFragment::FT_CompactEncodedInst:
+      case MCFragment::FT_Compressed:
       case MCFragment::FT_Data:
         return true;
     }
@@ -193,7 +195,8 @@ public:
 
   static bool classof(const MCFragment *F) {
     MCFragment::FragmentType Kind = F->getKind();
-    return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
+    return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data ||
+           Kind == MCFragment::FT_Compressed;
   }
 };
 
@@ -212,6 +215,11 @@ class MCDataFragment : public MCEncodedFragmentWithFixups {
 
   /// Fixups - The list of fixups in this fragment.
   SmallVector<MCFixup, 4> Fixups;
+protected:
+  MCDataFragment(MCFragment::FragmentType FType, MCSectionData *SD = 0)
+      : MCEncodedFragmentWithFixups(FType, SD), HasInstructions(false),
+        AlignToBundleEnd(false) {}
+
 public:
   MCDataFragment(MCSectionData *SD = 0)
     : MCEncodedFragmentWithFixups(FT_Data, SD),
@@ -245,10 +253,21 @@ public:
   const_fixup_iterator fixup_end() const override {return Fixups.end();}
 
   static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Data;
+    return F->getKind() == MCFragment::FT_Data ||
+           F->getKind() == MCFragment::FT_Compressed;
   }
 };
 
+class MCCompressedFragment: public MCDataFragment {
+  void anchor() override;
+
+  mutable SmallVector<char, 32> CompressedContents;
+public:
+  MCCompressedFragment(MCSectionData *SD = nullptr)
+      : MCDataFragment(FT_Compressed, SD) {}
+  const SmallVectorImpl<char> &getCompressedContents() const;
+};
+
 /// This is a compact (memory-size-wise) fragment for holding an encoded
 /// instruction (non-relaxable) that has no fixups registered. When applicable,
 /// it can be used instead of MCDataFragment and lead to lower memory
diff --git include/llvm/MC/MCContext.h include/llvm/MC/MCContext.h
index afb94f0..ee25eb0 100644
--- include/llvm/MC/MCContext.h
+++ include/llvm/MC/MCContext.h
@@ -152,6 +152,9 @@ namespace llvm {
     /// Darwin).
     bool AllowTemporaryLabels;
 
+    /// Zlib compress .debug_* sections.
+    bool CompressDebugSections;
+
     /// The Compile Unit ID that we are currently processing.
     unsigned DwarfCompileUnitID;
 
@@ -181,6 +184,8 @@ namespace llvm {
 
     void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
 
+    void setCompressDebugSections(bool Value) { CompressDebugSections = Value; }
+
     /// @name Module Lifetime Management
     /// @{
 
diff --git lib/MC/MCAssembler.cpp lib/MC/MCAssembler.cpp
index 89d7d19..c12931c 100644
--- lib/MC/MCAssembler.cpp
+++ lib/MC/MCAssembler.cpp
@@ -28,6 +28,9 @@
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Host.h"
 
 using namespace llvm;
 
@@ -230,6 +233,31 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
 
 /* *** */
 
+const SmallVectorImpl<char> &MCCompressedFragment::getCompressedContents() const {
+  if (CompressedContents.empty()) {
+    std::unique_ptr<MemoryBuffer> CompressedSection;
+    zlib::Status Success =
+        zlib::compress(StringRef(getContents().data(), getContents().size()),
+                       CompressedSection);
+    (void)Success;
+    assert(Success == zlib::StatusOK);
+    CompressedContents.push_back('Z');
+    CompressedContents.push_back('L');
+    CompressedContents.push_back('I');
+    CompressedContents.push_back('B');
+    uint64_t Size = getContents().size();
+    if (sys::IsLittleEndianHost)
+      Size = sys::SwapByteOrder(Size);
+    CompressedContents.append(reinterpret_cast<char *>(&Size),
+                              reinterpret_cast<char *>(&Size + 1));
+    CompressedContents.append(CompressedSection->getBuffer().begin(),
+                              CompressedSection->getBuffer().end());
+  }
+  return CompressedContents;
+}
+
+/* *** */
+
 MCSectionData::MCSectionData() : Section(0) {}
 
 MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
@@ -429,6 +457,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
   case MCFragment::FT_Relaxable:
   case MCFragment::FT_CompactEncodedInst:
     return cast<MCEncodedFragment>(F).getContents().size();
+  case MCFragment::FT_Compressed:
+    return cast<MCCompressedFragment>(F).getCompressedContents().size();
   case MCFragment::FT_Fill:
     return cast<MCFillFragment>(F).getSize();
 
@@ -617,6 +647,11 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
     break;
   }
 
+  case MCFragment::FT_Compressed:
+    ++stats::EmittedDataFragments;
+    OW->WriteBytes(cast<MCCompressedFragment>(F).getCompressedContents());
+    break;
+
   case MCFragment::FT_Data: 
     ++stats::EmittedDataFragments;
     writeFragmentContents(F, OW);
@@ -693,6 +728,7 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
            ie = SD->end(); it != ie; ++it) {
       switch (it->getKind()) {
       default: llvm_unreachable("Invalid fragment in virtual section!");
+      case MCFragment::FT_Compressed:
       case MCFragment::FT_Data: {
         // Check that we aren't trying to write a non-zero contents (or fixups)
         // into a virtual section. This is to support clients which use standard
@@ -1020,6 +1056,8 @@ void MCFragment::dump() {
   switch (getKind()) {
   case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
   case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_Compressed:
+    OS << "MCCompressedFragment"; break;
   case MCFragment::FT_CompactEncodedInst:
     OS << "MCCompactEncodedInstFragment"; break;
   case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
@@ -1046,6 +1084,7 @@ void MCFragment::dump() {
        << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
     break;
   }
+  case MCFragment::FT_Compressed:
   case MCFragment::FT_Data:  {
     const MCDataFragment *DF = cast<MCDataFragment>(this);
     OS << "\n       ";
@@ -1177,6 +1216,7 @@ void MCAssembler::dump() {
 void MCEncodedFragment::anchor() { }
 void MCEncodedFragmentWithFixups::anchor() { }
 void MCDataFragment::anchor() { }
+void MCCompressedFragment::anchor() { }
 void MCCompactEncodedInstFragment::anchor() { }
 void MCRelaxableFragment::anchor() { }
 void MCAlignFragment::anchor() { }
diff --git lib/MC/MCContext.cpp lib/MC/MCContext.cpp
index ede3b3c..6a002f8 100644
--- lib/MC/MCContext.cpp
+++ lib/MC/MCContext.cpp
@@ -37,13 +37,13 @@ typedef std::map<SectionGroupPair, const MCSectionCOFF *> COFFUniqueMapTy;
 
 MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
                      const MCObjectFileInfo *mofi, const SourceMgr *mgr,
-                     bool DoAutoReset) :
-  SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
-  Allocator(), Symbols(Allocator), UsedNames(Allocator),
-  NextUniqueID(0),
-  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
-  DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0),
-  AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) {
+                     bool DoAutoReset)
+    : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(),
+      Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0),
+      CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
+      GenDwarfForAssembly(false), GenDwarfFileNumber(0),
+      AllowTemporaryLabels(true), CompressDebugSections(false),
+      DwarfCompileUnitID(0), AutoReset(DoAutoReset) {
 
   error_code EC = llvm::sys::fs::current_path(CompilationDir);
   if (EC)
@@ -251,6 +251,10 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
     ELFUniquingMap = new ELFUniqueMapTy();
   ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
 
+  SmallString<32> ZDebugName;
+  if (CompressDebugSections && Section.startswith(".debug_"))
+    Section = (".z" + Section.drop_front(1)).toStringRef(ZDebugName);
+
   // Do the lookup, if we have a hit, return it.
   std::pair<ELFUniqueMapTy::iterator, bool> Entry = Map.insert(
       std::make_pair(SectionGroupPair(Section, Group), (MCSectionELF *)0));
diff --git lib/MC/MCObjectStreamer.cpp lib/MC/MCObjectStreamer.cpp
index 6b2234e..55e3234 100644
--- lib/MC/MCObjectStreamer.cpp
+++ lib/MC/MCObjectStreamer.cpp
@@ -20,6 +20,8 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/MC/MCSectionELF.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -63,7 +65,11 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
   if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
-    F = new MCDataFragment();
+    const auto *Sec = dyn_cast<MCSectionELF>(&getCurrentSectionData()->getSection());
+    if (Sec && Sec->getSectionName().startswith(".zdebug_"))
+      F = new MCCompressedFragment();
+    else
+      F = new MCDataFragment();
     insert(F);
   }
   return F;


More information about the llvm-commits mailing list