r259976 - [modules] Compress files embedded into a .pcm file, to reduce the disk usage of -fembed-all-files mode.
Richard Smith via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 5 18:06:44 PST 2016
Author: rsmith
Date: Fri Feb 5 20:06:43 2016
New Revision: 259976
URL: http://llvm.org/viewvc/llvm-project?rev=259976&view=rev
Log:
[modules] Compress files embedded into a .pcm file, to reduce the disk usage of -fembed-all-files mode.
Added:
cfe/trunk/test/Modules/embed-files-compressed.cpp
Modified:
cfe/trunk/include/clang/Serialization/ASTBitCodes.h
cfe/trunk/lib/Serialization/ASTReader.cpp
cfe/trunk/lib/Serialization/ASTWriter.cpp
cfe/trunk/test/lit.cfg
cfe/trunk/test/lit.site.cfg.in
Modified: cfe/trunk/include/clang/Serialization/ASTBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Serialization/ASTBitCodes.h?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/include/clang/Serialization/ASTBitCodes.h (original)
+++ cfe/trunk/include/clang/Serialization/ASTBitCodes.h Fri Feb 5 20:06:43 2016
@@ -591,9 +591,12 @@ namespace clang {
/// SM_SLOC_BUFFER_ENTRY record or a SM_SLOC_FILE_ENTRY with an
/// overridden buffer.
SM_SLOC_BUFFER_BLOB = 3,
+ /// \brief Describes a zlib-compressed blob that contains the data for
+ /// a buffer entry.
+ SM_SLOC_BUFFER_BLOB_COMPRESSED = 4,
/// \brief Describes a source location entry (SLocEntry) for a
/// macro expansion.
- SM_SLOC_EXPANSION_ENTRY = 4
+ SM_SLOC_EXPANSION_ENTRY = 5
};
/// \brief Record types used within a preprocessor block.
Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Fri Feb 5 20:06:43 2016
@@ -48,6 +48,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -1203,6 +1204,32 @@ bool ASTReader::ReadSLocEntry(int ID) {
return true;
}
+ // Local helper to read the (possibly-compressed) buffer data following the
+ // entry record.
+ auto ReadBuffer = [this](
+ BitstreamCursor &SLocEntryCursor,
+ StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
+ RecordData Record;
+ StringRef Blob;
+ unsigned Code = SLocEntryCursor.ReadCode();
+ unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
+
+ if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
+ SmallString<0> Uncompressed;
+ if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) !=
+ llvm::zlib::StatusOK) {
+ Error("could not decompress embedded file contents");
+ return nullptr;
+ }
+ return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
+ } else if (RecCode == SM_SLOC_BUFFER_BLOB) {
+ return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
+ } else {
+ Error("AST record has invalid code");
+ return nullptr;
+ }
+ };
+
ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
F->SLocEntryCursor.JumpToBit(F->SLocEntryOffsets[ID - F->SLocEntryBaseID]);
BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
@@ -1258,24 +1285,16 @@ bool ASTReader::ReadSLocEntry(int ID) {
FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
NumFileDecls));
}
-
+
const SrcMgr::ContentCache *ContentCache
= SourceMgr.getOrCreateContentCache(File,
/*isSystemFile=*/FileCharacter != SrcMgr::C_User);
if (OverriddenBuffer && !ContentCache->BufferOverridden &&
ContentCache->ContentsEntry == ContentCache->OrigEntry &&
!ContentCache->getRawBuffer()) {
- unsigned Code = SLocEntryCursor.ReadCode();
- Record.clear();
- unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
-
- if (RecCode != SM_SLOC_BUFFER_BLOB) {
- Error("AST record has invalid code");
+ auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
+ if (!Buffer)
return true;
- }
-
- std::unique_ptr<llvm::MemoryBuffer> Buffer
- = llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), File->getName());
SourceMgr.overrideFileContents(File, std::move(Buffer));
}
@@ -1292,18 +1311,10 @@ bool ASTReader::ReadSLocEntry(int ID) {
(F->Kind == MK_ImplicitModule || F->Kind == MK_ExplicitModule)) {
IncludeLoc = getImportLocation(F);
}
- unsigned Code = SLocEntryCursor.ReadCode();
- Record.clear();
- unsigned RecCode
- = SLocEntryCursor.readRecord(Code, Record, &Blob);
- if (RecCode != SM_SLOC_BUFFER_BLOB) {
- Error("AST record has invalid code");
+ auto Buffer = ReadBuffer(SLocEntryCursor, Name);
+ if (!Buffer)
return true;
- }
-
- std::unique_ptr<llvm::MemoryBuffer> Buffer =
- llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name);
SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
BaseOffset + Offset, IncludeLoc);
break;
Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Fri Feb 5 20:06:43 2016
@@ -50,6 +50,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -962,6 +963,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(SM_SLOC_FILE_ENTRY);
RECORD(SM_SLOC_BUFFER_ENTRY);
RECORD(SM_SLOC_BUFFER_BLOB);
+ RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
RECORD(SM_SLOC_EXPANSION_ENTRY);
// Preprocessor Block.
@@ -1631,11 +1633,15 @@ static unsigned CreateSLocBufferAbbrev(l
/// \brief Create an abbreviation for the SLocEntry that refers to a
/// buffer's blob.
-static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
+static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
+ bool Compressed) {
using namespace llvm;
auto *Abbrev = new BitCodeAbbrev();
- Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB));
+ Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
+ : SM_SLOC_BUFFER_BLOB));
+ if (Compressed)
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
return Stream.EmitAbbrev(Abbrev);
}
@@ -1857,12 +1863,14 @@ void ASTWriter::WriteSourceManagerBlock(
RecordData Record;
// Enter the source manager block.
- Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 3);
+ Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
// Abbreviations for the various kinds of source-location entries.
unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
- unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream);
+ unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
+ unsigned SLocBufferBlobCompressedAbbrv =
+ CreateSLocBufferBlobAbbrev(Stream, true);
unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);
// Write out the source location entry table. We skip the first
@@ -1902,6 +1910,7 @@ void ASTWriter::WriteSourceManagerBlock(
Record.push_back(File.hasLineDirectives());
const SrcMgr::ContentCache *Content = File.getContentCache();
+ bool EmitBlob = false;
if (Content->OrigEntry) {
assert(Content->OrigEntry == Content->ContentsEntry &&
"Writing to AST an overridden file is not supported");
@@ -1923,14 +1932,8 @@ void ASTWriter::WriteSourceManagerBlock(
Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
- if (Content->BufferOverridden || Content->IsTransient) {
- RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
- const llvm::MemoryBuffer *Buffer
- = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
- Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
- StringRef(Buffer->getBufferStart(),
- Buffer->getBufferSize() + 1));
- }
+ if (Content->BufferOverridden || Content->IsTransient)
+ EmitBlob = true;
} else {
// The source location entry is a buffer. The blob associated
// with this entry contains the contents of the buffer.
@@ -1943,15 +1946,34 @@ void ASTWriter::WriteSourceManagerBlock(
const char *Name = Buffer->getBufferIdentifier();
Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
StringRef(Name, strlen(Name) + 1));
- RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
- Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
- StringRef(Buffer->getBufferStart(),
- Buffer->getBufferSize() + 1));
+ EmitBlob = true;
if (strcmp(Name, "<built-in>") == 0) {
PreloadSLocs.push_back(SLocEntryOffsets.size());
}
}
+
+ if (EmitBlob) {
+ // Include the implicit terminating null character in the on-disk buffer
+ // if we're writing it uncompressed.
+ const llvm::MemoryBuffer *Buffer =
+ Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
+ StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
+
+ // Compress the buffer if possible. We expect that almost all PCM
+ // consumers will not want its contents.
+ SmallString<0> CompressedBuffer;
+ if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) ==
+ llvm::zlib::StatusOK) {
+ RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
+ Blob.size() - 1};
+ Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
+ CompressedBuffer);
+ } else {
+ RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
+ Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
+ }
+ }
} else {
// The source location entry is a macro expansion.
const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();
Added: cfe/trunk/test/Modules/embed-files-compressed.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/embed-files-compressed.cpp?rev=259976&view=auto
==============================================================================
--- cfe/trunk/test/Modules/embed-files-compressed.cpp (added)
+++ cfe/trunk/test/Modules/embed-files-compressed.cpp Fri Feb 5 20:06:43 2016
@@ -0,0 +1,23 @@
+// REQUIRES: zlib
+// REQUIRES: shell
+//
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '//////////////////////////////////////////////////////////////////////' > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: cat %t/a.h %t/a.h %t/a.h %t/a.h > %t/b.h
+// RUN: cat %t/b.h %t/b.h %t/b.h %t/b.h > %t/a.h
+// RUN: echo 'module a { header "a.h" }' > %t/modulemap
+//
+// RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm
+//
+// The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file.
+// Check that the resulting file is under 40KB:
+//
+// RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s
+// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}}
Modified: cfe/trunk/test/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/lit.cfg?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/test/lit.cfg (original)
+++ cfe/trunk/test/lit.cfg Fri Feb 5 20:06:43 2016
@@ -467,6 +467,11 @@ else:
if config.enable_backtrace == "1":
config.available_features.add("backtrace")
+if config.have_zlib == "1":
+ config.available_features.add("zlib")
+else:
+ config.available_features.add("nozlib")
+
# Check if we should run long running tests.
if lit_config.params.get("run_long_tests", None) == "true":
config.available_features.add("long_tests")
Modified: cfe/trunk/test/lit.site.cfg.in
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/lit.site.cfg.in?rev=259976&r1=259975&r2=259976&view=diff
==============================================================================
--- cfe/trunk/test/lit.site.cfg.in (original)
+++ cfe/trunk/test/lit.site.cfg.in Fri Feb 5 20:06:43 2016
@@ -14,6 +14,7 @@ config.clang_tools_dir = "@CLANG_TOOLS_D
config.host_triple = "@LLVM_HOST_TRIPLE@"
config.target_triple = "@TARGET_TRIPLE@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
+config.have_zlib = "@HAVE_LIBZ@"
config.clang_arcmt = @ENABLE_CLANG_ARCMT@
config.clang_staticanalyzer = @ENABLE_CLANG_STATIC_ANALYZER@
config.clang_examples = @ENABLE_CLANG_EXAMPLES@
More information about the cfe-commits
mailing list