[llvm-commits] CVS: llvm/lib/Support/Compressor.cpp

Reid Spencer reid at x10sys.com
Mon Oct 4 03:49:52 PDT 2004



Changes in directory llvm/lib/Support:

Compressor.cpp added (r1.1)
---
Log message:

First version of a support utility to provide generalized compression in
LLVM that handles availability and unavailability of bzip2 and zlib.


---
Diffs of the changes:  (+526 -0)

Index: llvm/lib/Support/Compressor.cpp
diff -c /dev/null llvm/lib/Support/Compressor.cpp:1.1
*** /dev/null	Mon Oct  4 05:49:51 2004
--- llvm/lib/Support/Compressor.cpp	Mon Oct  4 05:49:41 2004
***************
*** 0 ****
--- 1,526 ----
+ //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
+ // 
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Reid Spencer and is distributed under the 
+ // University of Illinois Open Source License. See LICENSE.TXT for details.
+ // 
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the llvm::Compressor class, an abstraction for memory
+ // block compression.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "llvm/Config/config.h"
+ #include "llvm/Support/Compressor.h"
+ #include "llvm/ADT/StringExtras.h"
+ #include <cassert>
+ #include <string>
+ 
+ #ifdef HAVE_BZIP2
+ #include <bzlib.h>
+ #endif
+ 
+ #ifdef HAVE_ZLIB
+ #include <zlib.h>
+ #endif
+ 
+ #ifndef HAVE_BZIP2
+ # ifndef HAVE_ZLIB
+ #  warning No compression library is available!!
+ # endif
+ #endif
+ 
+ namespace {
+ 
+ inline int getdata(char*& buffer, unsigned& size, 
+                    llvm::Compressor::OutputDataCallback* cb) {
+   buffer = 0;
+   size = 0;
+   int result = (*cb)(buffer, size);
+   assert(buffer != 0 && "Invalid result from Compressor callback");
+   assert(size != 0 && "Invalid result from Compressor callback");
+   return result;
+ }
+ 
+ //===----------------------------------------------------------------------===//
+ //=== RLCOMP - very simple run length compression scheme
+ //===   The code below transforms the input into blocks that are either 
+ //===   compressed or not. Each block starts with a header byte that provides
+ //===   the length of the block. Values < 128 are uncompressed, values >128
+ //===   are compressed. The value 128 is illegal. Currently, the algorithm is
+ //===   not completed and is #if'd out.
+ //===----------------------------------------------------------------------===//
+ 
+ enum {
+   MAX_RLCOMP_OUT = 32768
+ };
+ 
+ struct RLCOMP_stream {
+   // User provided fields
+   char* next_in;
+   unsigned avail_in;
+   char* next_out;
+   unsigned avail_out;
+ 
+   // Information fields
+   uint64_t output_count; // Total count of output bytes
+ 
+ #if 0
+   // Algorithm fields
+   unsigned block_len;    // Length of current block
+   unsigned compression;  // State of compression 0=no, 1=yes, 2=indeterminate
+   char buffer[128];      // compression buffer (not used for decompress)
+   unsigned buflen;       // bytes in compression buffer
+   bool pending;          // is data pending to be written
+   char pending_data;     // the pending data
+   unsigned clen;         // length of the compressed block
+ #endif
+ };
+ 
+ void RLCOMP_init(RLCOMP_stream* s) {
+   s->output_count = 0;
+ #if 0
+   s->block_len = 0;
+   s->compression = 2;
+   s->buflen = 0;
+   s->pending = false;
+   s->pending_data = 0;
+   s->clen = 0;
+ #endif
+ }
+ 
+ inline bool RLCOMP_getchar(RLCOMP_stream* s, unsigned& data) {
+ #if 0
+   if (s->avail_in) {
+     data = *s->next_in++;
+     s->avail_in--;
+     return true;
+   }
+ #endif
+   return false;
+ }
+ 
+ inline bool RLCOMP_putchar(RLCOMP_stream* s, unsigned data) {
+ #if 0
+   if (s->avail_out) {
+     *s->next_out++ = data;
+     s->avail_out--;
+     s->output_count++;
+     return true;
+   } else {
+     s->pending = true;
+     s->pending_data = data;
+     return false;
+   }
+ #else
+   return false;
+ #endif
+ }
+ 
+ bool RLCOMP_compress(RLCOMP_stream* s) {
+   assert(s && "Invalid RLCOMP_stream");
+   assert(s->next_in != 0);
+   assert(s->next_out != 0);
+   assert(s->avail_in >= 1);
+   assert(s->avail_out >= 1);
+ 
+ #if 0
+ 
+   // Handle pending data from the last time in
+   if (s->pending) {
+     RLCOMP_putchar(s,s->pending_data);
+     s->pending = false;
+   }
+ 
+   unsigned c = 0;
+   unsigned lastc = 0;
+   // Handle the degenerate len=1 case
+   if (!RLCOMP_getchar(s,lastc)) {
+     RLCOMP_putchar(s,1);
+     return RLCOMP_putchar(s,lastc);
+   }
+ 
+   while (RLCOMP_getchar(s,c)) {
+     switch(s->compression) {
+       case 0:
+         if (lastc == c) {
+           s->compression = 1;
+           s->clen = 2 ;
+         } else {
+           if (!RLCOMP_putchar(s, c))
+             return false;
+         }
+         break;
+ 
+       case 1:
+         if (lastc != c) {
+           s->compression = 2;
+           if (!RLCOMP_putchar(s, s->clen))
+             return false;
+         } else {
+           s->clen++;
+         }
+         break;
+ 
+       case 2:
+         break;
+     }
+     lastc = c;
+   }
+ #endif
+   if (s->avail_out >= s->avail_in) {
+     ::memcpy(s->next_out, s->next_in, s->avail_in);
+     s->output_count += s->avail_in;
+     s->avail_out -= s->avail_in;
+     s->next_in += s->avail_in;
+     s->avail_in = 0;
+     return true;
+   } else {
+     ::memcpy(s->next_out, s->next_in, s->avail_out);
+     s->output_count += s->avail_out;
+     s->avail_in -= s->avail_out;
+     s->next_in += s->avail_out;
+     s->avail_out = 0;
+     return false;
+   }
+ }
+ 
+ bool RLCOMP_decompress(RLCOMP_stream* s) {
+   assert(s && "Invalid RLCOMP_stream");
+   assert(s->next_in != 0);
+   assert(s->next_out != 0);
+   assert(s->avail_in >= 1);
+   assert(s->avail_out >= 1);
+ 
+ #if 0
+   unsigned c = 0;
+   while (RLCOMP_getchar(s,c)) {
+     switch(s->compression) {
+       case 0: // This is not a compressed block
+         s->block_len--;
+         if (!RLCOMP_putchar(s,c))
+           return false;
+         break;
+ 
+       case 1: // This is a comperssed block
+         while (s->block_len-- > 0)
+           if (!RLCOMP_putchar(s,c))
+             return false;
+         break;
+ 
+       case 2: // This is the length field
+         if (c < 128) {
+           s->compression = 0;
+           s->block_len = c;
+         } else {
+           s->compression = 1;
+           s->block_len = c - 128;
+         }
+         continue;
+ 
+       default: // oops!
+         throw std::string("Invalid compression state");
+     }
+     if (s->block_len <= 0)
+       s->compression = 2;
+   }
+ 
+   if (s->repeat > 0)
+     throw std::string("Invalid compression state");
+ #endif
+   if (s->avail_out >= s->avail_in) {
+     ::memcpy(s->next_out, s->next_in, s->avail_in);
+     s->output_count += s->avail_in;
+     s->avail_out -= s->avail_in;
+     s->next_in += s->avail_in;
+     s->avail_in = 0;
+     return true;
+   } else {
+     ::memcpy(s->next_out, s->next_in, s->avail_out);
+     s->output_count += s->avail_out;
+     s->avail_in -= s->avail_out;
+     s->next_in += s->avail_out;
+     s->avail_out = 0;
+     return false;
+   }
+ }
+ 
+ void RLCOMP_end(RLCOMP_stream* strm) {
+ }
+ 
+ }
+ 
+ namespace llvm {
+ 
+ // Compress in one of three ways
+ uint64_t Compressor::compress(char* in, unsigned size, 
+                               OutputDataCallback* cb, Algorithm hint) {
+   assert(in && "Can't compress null buffer");
+   assert(size && "Can't compress empty buffer");
+   assert(cb && "Can't compress without a callback function");
+ 
+   uint64_t result = 0;
+ 
+   switch (hint) {
+     case COMP_TYPE_BZIP2: {
+ #if defined(HAVE_BZIP2)
+       // Set up the bz_stream
+       bz_stream bzdata;
+       bzdata.bzalloc = 0;
+       bzdata.bzfree = 0;
+       bzdata.opaque = 0;
+       bzdata.next_in = in;
+       bzdata.avail_in = size;
+       bzdata.next_out = 0;
+       bzdata.avail_out = 0;
+       switch ( BZ2_bzCompressInit(&bzdata, 9, 0, 0) ) {
+         case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
+         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
+         case BZ_MEM_ERROR:    throw std::string("Out of memory");
+         case BZ_OK:
+         default:
+           break;
+       }
+ 
+       // Get a block of memory
+       if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
+         BZ2_bzCompressEnd(&bzdata);
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       // Put compression code in first byte
+       (*bzdata.next_out++) = COMP_TYPE_BZIP2;
+       bzdata.avail_out--;
+ 
+       // Compress it
+       int bzerr = BZ_FINISH_OK;
+       while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
+         if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
+           BZ2_bzCompressEnd(&bzdata);
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+       switch (bzerr) {
+         case BZ_SEQUENCE_ERROR:
+         case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
+         case BZ_FINISH_OK:
+         case BZ_STREAM_END: break;
+         default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
+       }
+ 
+       // Finish
+       result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
+           bzdata.total_out_lo32 + 1;
+ 
+       BZ2_bzCompressEnd(&bzdata);
+       break;
+ #else
+       // FALL THROUGH
+ #endif
+     }
+ 
+     case COMP_TYPE_ZLIB: {
+ #if defined(HAVE_ZLIB)
+       z_stream zdata;
+       zdata.zalloc = Z_NULL;
+       zdata.zfree = Z_NULL;
+       zdata.opaque = Z_NULL;
+       zdata.next_in = reinterpret_cast<Bytef*>(in);
+       zdata.avail_in = size;
+       if (Z_OK != deflateInit(&zdata,Z_BEST_COMPRESSION))
+         throw std::string(zdata.msg ? zdata.msg : "zlib error");
+ 
+       if (0 != getdata((char*&)(zdata.next_out), zdata.avail_out,cb)) {
+         deflateEnd(&zdata);
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       (*zdata.next_out++) = COMP_TYPE_ZLIB;
+       zdata.avail_out--;
+ 
+       int flush = 0;
+       while ( Z_OK == deflate(&zdata,0) && zdata.avail_out == 0) {
+         if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb)) {
+           deflateEnd(&zdata);
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       while ( Z_STREAM_END != deflate(&zdata, Z_FINISH)) {
+         if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb)) {
+           deflateEnd(&zdata);
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       result = static_cast<uint64_t>(zdata.total_out) + 1;
+       deflateEnd(&zdata);
+       break;
+ 
+ #else
+     // FALL THROUGH
+ #endif
+     }
+ 
+     case COMP_TYPE_SIMPLE: {
+       RLCOMP_stream sdata;
+       sdata.next_in = in;
+       sdata.avail_in = size;
+       RLCOMP_init(&sdata);
+ 
+       if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       *(sdata.next_out++) = COMP_TYPE_SIMPLE;
+       sdata.avail_out--;
+ 
+       while (!RLCOMP_compress(&sdata)) {
+         if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       result = sdata.output_count + 1;
+       RLCOMP_end(&sdata);
+       break;
+     }
+     default:
+       throw std::string("Invalid compression type hint");
+   }
+   return result;
+ }
+ 
+ // Decompress in one of three ways
+ uint64_t Compressor::decompress(char *in, unsigned size, 
+                                 OutputDataCallback* cb) {
+   assert(in && "Can't decompress null buffer");
+   assert(size > 1 && "Can't decompress empty buffer");
+   assert(cb && "Can't decompress without a callback function");
+ 
+   uint64_t result = 0;
+ 
+   switch (*in++) {
+     case COMP_TYPE_BZIP2: {
+ #if !defined(HAVE_BZIP2)
+       throw std::string("Can't decompress BZIP2 data");
+ #else
+       // Set up the bz_stream
+       bz_stream bzdata;
+       bzdata.bzalloc = 0;
+       bzdata.bzfree = 0;
+       bzdata.opaque = 0;
+       bzdata.next_in = in;
+       bzdata.avail_in = size - 1;
+       bzdata.next_out = 0;
+       bzdata.avail_out = 0;
+       switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
+         case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
+         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
+         case BZ_MEM_ERROR:    throw std::string("Out of memory");
+         case BZ_OK:
+         default:
+           break;
+       }
+ 
+       // Get a block of memory
+       if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
+         BZ2_bzDecompressEnd(&bzdata);
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       // Decompress it
+       int bzerr = BZ_OK;
+       while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
+         if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
+           BZ2_bzDecompressEnd(&bzdata);
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       switch (bzerr) {
+         case BZ_PARAM_ERROR:  throw std::string("Compressor internal error");
+         case BZ_MEM_ERROR:    throw std::string("Out of memory");
+         case BZ_DATA_ERROR:   throw std::string("Data integrity error");
+         case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
+         default: throw("Ooops");
+         case BZ_STREAM_END:
+           break;
+       }
+ 
+       // Finish
+       result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
+         bzdata.total_out_lo32;
+       BZ2_bzDecompressEnd(&bzdata);
+       break;
+     }
+ #endif
+ 
+     case COMP_TYPE_ZLIB: {
+ #if !defined(HAVE_ZLIB)
+       throw std::string("Can't decompress ZLIB data");
+ #else
+       z_stream zdata;
+       zdata.zalloc = Z_NULL;
+       zdata.zfree = Z_NULL;
+       zdata.opaque = Z_NULL;
+       zdata.next_in = reinterpret_cast<Bytef*>(in);
+       zdata.avail_in = size - 1;
+       if ( Z_OK != inflateInit(&zdata))
+         throw std::string(zdata.msg ? zdata.msg : "zlib error");
+ 
+       if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb)) {
+         inflateEnd(&zdata);
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       int zerr = Z_OK;
+       while (Z_OK == (zerr = inflate(&zdata,0))) {
+         if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb)) {
+           inflateEnd(&zdata);
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       if (zerr != Z_STREAM_END)
+         throw std::string(zdata.msg?zdata.msg:"zlib error");
+ 
+       result = static_cast<uint64_t>(zdata.total_out);
+       inflateEnd(&zdata);
+       break;
+ #endif
+     }
+ 
+     case COMP_TYPE_SIMPLE: {
+       RLCOMP_stream sdata;
+       sdata.next_in = in;
+       sdata.avail_in = size - 1;
+       RLCOMP_init(&sdata);
+ 
+       if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
+         throw std::string("Can't allocate output buffer");
+       }
+ 
+       while (!RLCOMP_decompress(&sdata)) {
+         if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
+           throw std::string("Can't allocate output buffer");
+         }
+       }
+ 
+       result = sdata.output_count;
+       RLCOMP_end(&sdata);
+       break;
+     }
+ 
+     default:
+       throw std::string("Unknown type of compressed data");
+   }
+ 
+   return result;
+ }
+ 
+ }
+ 
+ // vim: sw=2 ai






More information about the llvm-commits mailing list