r259976 - [modules] Compress files embedded into a .pcm file, to reduce the disk usage of -fembed-all-files mode.

Richard Smith via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 5 18:06:44 PST 2016


Author: rsmith
Date: Fri Feb  5 20:06:43 2016
New Revision: 259976

URL: http://llvm.org/viewvc/llvm-project?rev=259976&view=rev
Log:
[modules] Compress files embedded into a .pcm file, to reduce the disk usage of -fembed-all-files mode.

Added:
    cfe/trunk/test/Modules/embed-files-compressed.cpp
Modified:
    cfe/trunk/include/clang/Serialization/ASTBitCodes.h
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/ASTWriter.cpp
    cfe/trunk/test/lit.cfg
    cfe/trunk/test/lit.site.cfg.in

Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Fri Feb  5 20:06:43 2016
@@ -591,9 +591,12 @@ namespace clang {
       /// SM_SLOC_BUFFER_ENTRY record or a SM_SLOC_FILE_ENTRY with an
       /// overridden buffer.
       SM_SLOC_BUFFER_BLOB = 3,
+      /// \brief Describes a zlib-compressed blob that contains the data for
+      /// a buffer entry.
+      SM_SLOC_BUFFER_BLOB_COMPRESSED = 4,
       /// \brief Describes a source location entry (SLocEntry) for a
       /// macro expansion.
-      SM_SLOC_EXPANSION_ENTRY = 4
+      SM_SLOC_EXPANSION_ENTRY = 5
     };
 
     /// \brief Record types used within a preprocessor block.

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Fri Feb  5 20:06:43 2016
@@ -48,6 +48,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1203,6 +1204,32 @@ bool ASTReader::ReadSLocEntry(int ID) {
     return true;
   }
 
+  // Local helper to read the (possibly-compressed) buffer data following the
+  // entry record.
+  auto ReadBuffer = [this](
+      BitstreamCursor &SLocEntryCursor,
+      StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
+    RecordData Record;
+    StringRef Blob;
+    unsigned Code = SLocEntryCursor.ReadCode();
+    unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
+
+    if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
+      SmallString<0> Uncompressed;
+      if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) !=
+          llvm::zlib::StatusOK) {
+        Error("could not decompress embedded file contents");
+        return nullptr;
+      }
+      return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
+    } else if (RecCode == SM_SLOC_BUFFER_BLOB) {
+      return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
+    } else {
+      Error("AST record has invalid code");
+      return nullptr;
+    }
+  };
+
   ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
   F->SLocEntryCursor.JumpToBit(F->SLocEntryOffsets[ID - F->SLocEntryBaseID]);
   BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
@@ -1258,24 +1285,16 @@ bool ASTReader::ReadSLocEntry(int ID) {
       FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
                                                              NumFileDecls));
     }
-    
+
     const SrcMgr::ContentCache *ContentCache
       = SourceMgr.getOrCreateContentCache(File,
                               /*isSystemFile=*/FileCharacter != SrcMgr::C_User);
     if (OverriddenBuffer && !ContentCache->BufferOverridden &&
         ContentCache->ContentsEntry == ContentCache->OrigEntry &&
         !ContentCache->getRawBuffer()) {
-      unsigned Code = SLocEntryCursor.ReadCode();
-      Record.clear();
-      unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
-      
-      if (RecCode != SM_SLOC_BUFFER_BLOB) {
-        Error("AST record has invalid code");
+      auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
+      if (!Buffer)
         return true;
-      }
-      
-      std::unique_ptr<llvm::MemoryBuffer> Buffer
-        = llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), File->getName());
       SourceMgr.overrideFileContents(File, std::move(Buffer));
     }
 
@@ -1292,18 +1311,10 @@ bool ASTReader::ReadSLocEntry(int ID) {
         (F->Kind == MK_ImplicitModule || F->Kind == MK_ExplicitModule)) {
       IncludeLoc = getImportLocation(F);
     }
-    unsigned Code = SLocEntryCursor.ReadCode();
-    Record.clear();
-    unsigned RecCode
-      = SLocEntryCursor.readRecord(Code, Record, &Blob);
 
-    if (RecCode != SM_SLOC_BUFFER_BLOB) {
-      Error("AST record has invalid code");
+    auto Buffer = ReadBuffer(SLocEntryCursor, Name);
+    if (!Buffer)
       return true;
-    }
-
-    std::unique_ptr<llvm::MemoryBuffer> Buffer =
-        llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name);
     SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
                            BaseOffset + Offset, IncludeLoc);
     break;

Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Fri Feb  5 20:06:43 2016
@@ -50,6 +50,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -962,6 +963,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(SM_SLOC_FILE_ENTRY);
   RECORD(SM_SLOC_BUFFER_ENTRY);
   RECORD(SM_SLOC_BUFFER_BLOB);
+  RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
   RECORD(SM_SLOC_EXPANSION_ENTRY);
 
   // Preprocessor Block.
@@ -1631,11 +1633,15 @@ static unsigned CreateSLocBufferAbbrev(l
 
 /// \brief Create an abbreviation for the SLocEntry that refers to a
 /// buffer's blob.
-static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
+static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
+                                           bool Compressed) {
   using namespace llvm;
 
   auto *Abbrev = new BitCodeAbbrev();
-  Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB));
+  Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
+                                         : SM_SLOC_BUFFER_BLOB));
+  if (Compressed)
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
   return Stream.EmitAbbrev(Abbrev);
 }
@@ -1857,12 +1863,14 @@ void ASTWriter::WriteSourceManagerBlock(
   RecordData Record;
 
   // Enter the source manager block.
-  Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 3);
+  Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
 
   // Abbreviations for the various kinds of source-location entries.
   unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
   unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
-  unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream);
+  unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
+  unsigned SLocBufferBlobCompressedAbbrv =
+      CreateSLocBufferBlobAbbrev(Stream, true);
   unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);
 
   // Write out the source location entry table. We skip the first
@@ -1902,6 +1910,7 @@ void ASTWriter::WriteSourceManagerBlock(
       Record.push_back(File.hasLineDirectives());
 
       const SrcMgr::ContentCache *Content = File.getContentCache();
+      bool EmitBlob = false;
       if (Content->OrigEntry) {
         assert(Content->OrigEntry == Content->ContentsEntry &&
                "Writing to AST an overridden file is not supported");
@@ -1923,14 +1932,8 @@ void ASTWriter::WriteSourceManagerBlock(
         
         Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
         
-        if (Content->BufferOverridden || Content->IsTransient) {
-          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
-          const llvm::MemoryBuffer *Buffer
-            = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
-          Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
-                                    StringRef(Buffer->getBufferStart(),
-                                              Buffer->getBufferSize() + 1));          
-        }
+        if (Content->BufferOverridden || Content->IsTransient)
+          EmitBlob = true;
       } else {
         // The source location entry is a buffer. The blob associated
         // with this entry contains the contents of the buffer.
@@ -1943,15 +1946,34 @@ void ASTWriter::WriteSourceManagerBlock(
         const char *Name = Buffer->getBufferIdentifier();
         Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
                                   StringRef(Name, strlen(Name) + 1));
-        RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
-        Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
-                                  StringRef(Buffer->getBufferStart(),
-                                                  Buffer->getBufferSize() + 1));
+        EmitBlob = true;
 
         if (strcmp(Name, "<built-in>") == 0) {
           PreloadSLocs.push_back(SLocEntryOffsets.size());
         }
       }
+
+      if (EmitBlob) {
+        // Include the implicit terminating null character in the on-disk buffer
+        // if we're writing it uncompressed.
+        const llvm::MemoryBuffer *Buffer =
+            Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
+        StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
+
+        // Compress the buffer if possible. We expect that almost all PCM
+        // consumers will not want its contents.
+        SmallString<0> CompressedBuffer;
+        if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) ==
+            llvm::zlib::StatusOK) {
+          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
+                                             Blob.size() - 1};
+          Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
+                                    CompressedBuffer);
+        } else {
+          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
+          Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
+        }
+      }
     } else {
       // The source location entry is a macro expansion.
       const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();

Added: cfe/trunk/test/Modules/embed-files-compressed.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/embed-files-compressed.cpp?rev=259976&view=auto
==============================================================================
--- cfe/trunk/test/Modules/embed-files-compressed.cpp (added)
+++ cfe/trunk/test/Modules/embed-files-compressed.cpp Fri Feb  5 20:06:43 2016
@@ -0,0 +1,23 @@
+// REQUIRES: zlib
+// REQUIRES: shell
+//
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '//////////////////////////////////////////////////////////////////////' > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: echo 'module a { header "a.h" }' > %t/modulemap
+//
+// RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm
+//
+// The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file.
+// Check that the resulting file is under 40KB:
+//
+// RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s
+// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}}

Modified: cfe/trunk/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/lit.cfg?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/test/lit.cfg (original)
+++ cfe/trunk/test/lit.cfg Fri Feb  5 20:06:43 2016
@@ -467,6 +467,11 @@ else:
 if config.enable_backtrace == "1":
     config.available_features.add("backtrace")
 
+if config.have_zlib == "1":
+    config.available_features.add("zlib")
+else:
+    config.available_features.add("nozlib")
+
 # Check if we should run long running tests.
 if lit_config.params.get("run_long_tests", None) == "true":
     config.available_features.add("long_tests")

Modified: cfe/trunk/test/lit.site.cfg.in
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/lit.site.cfg.in?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/test/lit.site.cfg.in (original)
+++ cfe/trunk/test/lit.site.cfg.in Fri Feb  5 20:06:43 2016
@@ -14,6 +14,7 @@ config.clang_tools_dir = "@CLANG_TOOLS_D
 config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
+config.have_zlib = "@HAVE_LIBZ@"
 config.clang_arcmt = @ENABLE_CLANG_ARCMT@
 config.clang_staticanalyzer = @ENABLE_CLANG_STATIC_ANALYZER@
 config.clang_examples = @ENABLE_CLANG_EXAMPLES@




More information about the cfe-commits mailing list