[llvm-commits] CVS: llvm/lib/Support/Compressor.cpp

Sun Nov 14 14:05:23 PST 2004

Changes in directory llvm/lib/Support:

Compressor.cpp updated: 1.5 -> 1.6
---
Log message:

Implement the high level interface to make (de)compression easier.

---
Diffs of the changes:  (+181 -9)

Index: llvm/lib/Support/Compressor.cpp
diff -u llvm/lib/Support/Compressor.cpp:1.5 llvm/lib/Support/Compressor.cpp:1.6

--- llvm/lib/Support/Compressor.cpp:1.5	Tue Nov  9 11:58:09 2004
+++ llvm/lib/Support/Compressor.cpp	Sun Nov 14 16:04:46 2004
@@ -111,13 +111,137 @@
 void NULLCOMP_end(NULLCOMP_stream* strm) {
 }
 
+/// This structure is only used when a bytecode file is compressed.
+/// As bytecode is being decompressed, the memory buffer might need
+/// to be reallocated. The buffer allocation is handled in a callback 
+/// and this structure is needed to retain information across calls
+/// to the callback.
+/// @brief An internal buffer object used for handling decompression
+struct BufferContext {
+  char* buff;
+  unsigned size;
+  BufferContext(unsigned compressedSize ) { 
+    // Null to indicate malloc of a new block
+    buff = 0; 
+
+    // Compute the initial length of the uncompression buffer. Note that this
+    // is twice the length of the compressed buffer and will be doubled again
+    // in the callback for an initial allocation of 4x compressedSize.  This 
+    // calculation is based on the typical compression ratio of bzip2 on LLVM 
+    // bytecode files which typically ranges in the 50%-75% range.   Since we 
+    // tyipcally get at least 50%, doubling is insufficient. By using a 4x 
+    // multiplier on the first allocation, we minimize the impact of having to
+    // copy the buffer on reallocation.
+    size = compressedSize*2; 
+  }
+
+  /// This function handles allocation of the buffer used for decompression of
+  /// compressed bytecode files. It is called by Compressor::decompress which is
+  /// called by BytecodeReader::ParseBytecode. 
+  static unsigned callback(char*&buff, unsigned& sz, void* ctxt){
+    // Case the context variable to our BufferContext
+    BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt);
+
+    // Compute the new, doubled, size of the block
+    unsigned new_size = bc->size * 2;
+
+    // Extend or allocate the block (realloc(0,n) == malloc(n))
+    char* new_buff = (char*) ::realloc(bc->buff, new_size);
+
+    // Figure out what to return to the Compressor. If this is the first call,
+    // then bc->buff will be null. In this case we want to return the entire
+    // buffer because there was no previous allocation.  Otherwise, when the
+    // buffer is reallocated, we save the new base pointer in the BufferContext.buff
+    // field but return the address of only the extension, mid-way through the
+    // buffer (since its size was doubled). Furthermore, the sz result must be
+    // 1/2 the total size of the buffer.
+    if (bc->buff == 0 ) {
+      buff = bc->buff = new_buff;
+      sz = new_size;
+    } else {
+      bc->buff = new_buff;
+      buff = new_buff + bc->size;
+      sz = bc->size;
+    }
+
+    // Retain the size of the allocated block
+    bc->size = new_size;
+
+    // Make sure we fail (return 1) if we didn't get any memory.
+    return (bc->buff == 0 ? 1 : 0);
+  }
+};
+
+// This structure retains the context when compressing the bytecode file. The
+// WriteCompressedData function below uses it to keep track of the previously
+// filled chunk of memory (which it writes) and how many bytes have been 
+// written.
+struct WriterContext {
+  // Initialize the context
+  WriterContext(std::ostream*OS, unsigned CS) 
+    : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {}
+
+  // Make sure we clean up memory
+  ~WriterContext() {
+    if (chunk)
+      delete [] chunk;
+  }
+
+  // Write the chunk
+  void write(unsigned size = 0) {
+    unsigned write_size = (size == 0 ? sz : size);
+    Out->write(chunk,write_size);
+    written += write_size;
+    delete [] chunk;
+    chunk = 0;
+    sz = 0;
+  }
+
+  // This function is a callback used by the Compressor::compress function to 
+  // allocate memory for the compression buffer. This function fulfills that
+  // responsibility but also writes the previous (now filled) buffer out to the
+  // stream. 
+  static unsigned callback(char*& buffer, unsigned& size, void* context) {
+    // Cast the context to the structure it must point to.
+    WriterContext* ctxt = 
+      reinterpret_cast<WriterContext*>(context);
+
+    // If there's a previously allocated chunk, it must now be filled with
+    // compressed data, so we write it out and deallocate it.
+    if (ctxt->chunk != 0 && ctxt->sz > 0 ) {
+      ctxt->write();
+    }
+
+    // Compute the size of the next chunk to allocate. We attempt to allocate
+    // enough memory to handle the compression in a single memory allocation. In
+    // general, the worst we do on compression of bytecode is about 50% so we
+    // conservatively estimate compSize / 2 as the size needed for the
+    // compression buffer. compSize is the size of the compressed data, provided
+    // by WriteBytecodeToFile.
+    size = ctxt->sz = ctxt->compSize / 2;
+
+    // Allocate the chunks
+    buffer = ctxt->chunk = new char [size];
+
+    // We must return 1 if the allocation failed so that the Compressor knows
+    // not to use the buffer pointer.
+    return (ctxt->chunk == 0 ? 1 : 0);
+  }
+
+  char* chunk;       // pointer to the chunk of memory filled by compression
+  unsigned sz;       // size of chunk
+  unsigned written;  // aggregate total of bytes written in all chunks
+  unsigned compSize; // size of the uncompressed buffer
+  std::ostream* Out; // The stream we write the data to.
+};
+
 }
 
 namespace llvm {
 
 // Compress in one of three ways
-uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, 
-                              Algorithm hint, void* context ) {
+uint64_t Compressor::compress(const char* in, unsigned size, 
+    OutputDataCallback* cb, Algorithm hint, void* context ) {
   assert(in && "Can't compress null buffer");
   assert(size && "Can't compress empty buffer");
   assert(cb && "Can't compress without a callback function");
@@ -132,7 +256,7 @@
       bzdata.bzalloc = 0;
       bzdata.bzfree = 0;
       bzdata.opaque = 0;
-      bzdata.next_in = in;
+      bzdata.next_in = (char*)in;
       bzdata.avail_in = size;
       bzdata.next_out = 0;
       bzdata.avail_out = 0;
@@ -188,7 +312,7 @@
       zdata.zalloc = Z_NULL;
       zdata.zfree = Z_NULL;
       zdata.opaque = Z_NULL;
-      zdata.next_in = reinterpret_cast<Bytef*>(in);
+      zdata.next_in = (Bytef*)in;
       zdata.avail_in = size;
       if (Z_OK != deflateInit(&zdata,6))
         throw std::string(zdata.msg ? zdata.msg : "zlib error");
@@ -227,7 +351,7 @@
 
     case COMP_TYPE_SIMPLE: {
       NULLCOMP_stream sdata;
-      sdata.next_in = in;
+      sdata.next_in = (char*)in;
       sdata.avail_in = size;
       NULLCOMP_init(&sdata);
 
@@ -254,8 +378,33 @@
   return result;
 }
 
+uint64_t 
+Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out,
+                                Algorithm hint) {
+  BufferContext bc(size);
+  unsigned result = compress(in,size,BufferContext::callback,hint,(void*)&bc);
+  out = bc.buff;
+  return result;
+}
+
+uint64_t 
+Compressor::compressToStream(const char*in, unsigned size, std::ostream& out,
+                             Algorithm hint) {
+  // Set up the context and writer
+  WriterContext ctxt(&out,size / 2);
+
+  // Compress everything after the magic number (which we'll alter)
+  uint64_t zipSize = Compressor::compress(in,size,
+    WriterContext::callback, hint, (void*)&ctxt);
+
+  if (ctxt.chunk) {
+    ctxt.write(zipSize - ctxt.written);
+  }
+  return zipSize;
+}
+
 // Decompress in one of three ways
-uint64_t Compressor::decompress(char *in, unsigned size, 
+uint64_t Compressor::decompress(const char *in, unsigned size, 
                                 OutputDataCallback* cb, void* context) {
   assert(in && "Can't decompress null buffer");
   assert(size > 1 && "Can't decompress empty buffer");
@@ -273,7 +422,7 @@
       bzdata.bzalloc = 0;
       bzdata.bzfree = 0;
       bzdata.opaque = 0;
-      bzdata.next_in = in;
+      bzdata.next_in = (char*)in;
       bzdata.avail_in = size - 1;
       bzdata.next_out = 0;
       bzdata.avail_out = 0;
@@ -327,7 +476,7 @@
       zdata.zalloc = Z_NULL;
       zdata.zfree = Z_NULL;
       zdata.opaque = Z_NULL;
-      zdata.next_in = reinterpret_cast<Bytef*>(in);
+      zdata.next_in = (Bytef*)(in);
       zdata.avail_in = size - 1;
       if ( Z_OK != inflateInit(&zdata))
         throw std::string(zdata.msg ? zdata.msg : "zlib error");
@@ -356,7 +505,7 @@
 
     case COMP_TYPE_SIMPLE: {
       NULLCOMP_stream sdata;
-      sdata.next_in = in;
+      sdata.next_in = (char*)in;
       sdata.avail_in = size - 1;
       NULLCOMP_init(&sdata);
 
@@ -382,6 +531,29 @@
   return result;
 }
 
+uint64_t 
+Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) {
+  BufferContext bc(size);
+  unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc);
+  out = bc.buff;
+  return result;
+}
+                                                                                                                                            
+uint64_t 
+Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){
+  // Set up the context and writer
+  WriterContext ctxt(&out,size / 2);
+
+  // Compress everything after the magic number (which we'll alter)
+  uint64_t zipSize = Compressor::decompress(in,size,
+    WriterContext::callback, (void*)&ctxt);
+
+  if (ctxt.chunk) {
+    ctxt.write(zipSize - ctxt.written);
+  }
+  return zipSize;
+}
+
 }
 
 // vim: sw=2 ai