[llvm-commits] CVS: llvm/include/llvm/Bitcode/BitCodes.h BitstreamReader.h BitstreamWriter.h ReaderWriter.h

Sat Apr 21 23:22:23 PDT 2007

Changes in directory llvm/include/llvm/Bitcode:

BitCodes.h added (r1.1)
BitstreamReader.h added (r1.1)
BitstreamWriter.h added (r1.1)
ReaderWriter.h added (r1.1)
---
Log message:

Define the content-independent interfaces to read/write bitcode files and
the high-level interface to read/write LLVM IR bitcode files.

This is a work in progress.

---
Diffs of the changes:  (+523 -0)

 BitCodes.h        |   46 +++++++++++
 BitstreamReader.h |  220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 BitstreamWriter.h |  219 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 ReaderWriter.h    |   38 +++++++++
 4 files changed, 523 insertions(+)

Index: llvm/include/llvm/Bitcode/BitCodes.h
diff -c /dev/null llvm/include/llvm/Bitcode/BitCodes.h:1.1
*** /dev/null	Sun Apr 22 01:22:15 2007
--- llvm/include/llvm/Bitcode/BitCodes.h	Sun Apr 22 01:22:05 2007
***************
*** 0 ****
--- 1,46 ----
+ //===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This header Bitcode enum values.
+ //
+ // The enum values defined in this file should be considered permanent.  If
+ // new features are added, they should have values added at the end of the
+ // respective lists.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef LLVM_BITCODE_BITCODES_H
+ #define LLVM_BITCODE_BITCODES_H
+ 
+ namespace llvm {
+ namespace bitc {
+   enum StandardWidths {
+     BlockIDWidth = 8,  // We use VBR-8 for block IDs.
+     CodeLenWidth = 4,  // Codelen are VBR-4.
+     BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 32GB per block.
+   };
+   
+   // The standard code namespace always has a way to exit a block, enter a
+   // nested block, define abbrevs, and define an unabbreviated record.
+   enum FixedCodes {
+     END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+     ENTER_SUBBLOCK = 1,
+     
+     // Two codes are reserved for defining abbrevs and for emitting an
+     // unabbreviated record.
+     DEFINE_ABBREVS = 2,
+     UNABBREV_RECORD = 3,
+     
+     // This is not a code, this is a marker for the first abbrev assignment.
+     FIRST_ABBREV = 4
+   };
+ } // End bitc namespace
+ } // End llvm namespace
+ 
+ #endif

Index: llvm/include/llvm/Bitcode/BitstreamReader.h
diff -c /dev/null llvm/include/llvm/Bitcode/BitstreamReader.h:1.1
*** /dev/null	Sun Apr 22 01:22:23 2007
--- llvm/include/llvm/Bitcode/BitstreamReader.h	Sun Apr 22 01:22:05 2007
***************
*** 0 ****
--- 1,220 ----
+ //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under
+ // the University of Illinois Open Source License.  See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This header defines the BitstreamReader class.  This class can be used to
+ // read an arbitrary bitstream, regardless of its contents.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef BITSTREAM_READER_H
+ #define BITSTREAM_READER_H
+ 
+ #include "llvm/Bitcode/BitCodes.h"
+ #include "llvm/ADT/SmallVector.h"
+ #include <cassert>
+ 
+ namespace llvm {
+   
+ class BitstreamReader {
+   const unsigned char *NextChar;
+   const unsigned char *LastChar;
+   
+   /// CurWord - This is the current data we have pulled from the stream but have
+   /// not returned to the client.
+   uint32_t CurWord;
+   
+   /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+   /// is always from [0...31] inclusive.
+   unsigned BitsInCurWord;
+   
+   // CurCodeSize - This is the declared size of code values used for the current
+   // block, in bits.
+   unsigned CurCodeSize;
+   
+   /// BlockScope - This tracks the codesize of parent blocks.
+   SmallVector<unsigned, 8> BlockScope;
+   
+ public:
+   BitstreamReader(const unsigned char *Start, const unsigned char *End)
+     : NextChar(Start), LastChar(End) {
+     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+     CurWord = 0;
+     BitsInCurWord = 0;
+     CurCodeSize = 2;
+   }
+   
+   bool AtEndOfStream() const { return NextChar == LastChar; }
+   
+   uint32_t Read(unsigned NumBits) {
+     // If the field is fully contained by CurWord, return it quickly.
+     if (BitsInCurWord >= NumBits) {
+       uint32_t R = CurWord & ((1U << NumBits)-1);
+       CurWord >>= NumBits;
+       BitsInCurWord -= NumBits;
+       return R;
+     }
+ 
+     // If we run out of data, stop at the end of the stream.
+     if (LastChar == NextChar) {
+       CurWord = 0;
+       BitsInCurWord = 0;
+       return 0;
+     }
+     
+     unsigned R = CurWord;
+ 
+     // Read the next word from the stream.
+     CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
+               (NextChar[2] << 16) | (NextChar[3] << 24);
+     NextChar += 4;
+     
+     // Extract NumBits-BitsInCurWord from what we just read.
+     unsigned BitsLeft = NumBits-BitsInCurWord;
+     
+     // Be careful here, BitsLeft is in the range [1..32] inclusive.
+     R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
+     
+     // BitsLeft bits have just been used up from CurWord.
+     if (BitsLeft != 32)
+       CurWord >>= BitsLeft;
+     else
+       CurWord = 0;
+     BitsInCurWord = 32-BitsLeft;
+     return R;
+   }
+   
+   uint32_t ReadVBR(unsigned NumBits) {
+     uint32_t Piece = Read(NumBits);
+     if ((Piece & (1U << NumBits-1)) == 0)
+       return Piece;
+ 
+     uint32_t Result = 0;
+     unsigned NextBit = 0;
+     while (1) {
+       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+ 
+       if ((Piece & (1U << NumBits-1)) == 0)
+         return Result;
+       
+       NextBit += NumBits-1;
+       Piece = Read(NumBits);
+     }
+   }
+   
+   uint64_t ReadVBR64(unsigned NumBits) {
+     uint64_t Piece = Read(NumBits);
+     if ((Piece & (1U << NumBits-1)) == 0)
+       return Piece;
+     
+     uint64_t Result = 0;
+     unsigned NextBit = 0;
+     while (1) {
+       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+       
+       if ((Piece & (1U << NumBits-1)) == 0)
+         return Result;
+       
+       NextBit += NumBits-1;
+       Piece = Read(NumBits);
+     }
+   }
+ 
+   void SkipToWord() {
+     BitsInCurWord = 0;
+     CurWord = 0;
+   }
+ 
+   
+   unsigned ReadCode() {
+     return Read(CurCodeSize);
+   }
+ 
+   //===--------------------------------------------------------------------===//
+   // Block Manipulation
+   //===--------------------------------------------------------------------===//
+   
+   // Block header:
+   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+ 
+   /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+   /// the block.
+   unsigned ReadSubBlockID() {
+     return ReadVBR(bitc::BlockIDWidth);
+   }
+   
+   /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+   /// over the body of this block.  If the block record is malformed, return
+   /// true.
+   bool SkipBlock() {
+     // Read and ignore the codelen value.  Since we are skipping this block, we
+     // don't care what code widths are used inside of it.
+     ReadVBR(bitc::CodeLenWidth);
+     SkipToWord();
+     unsigned NumWords = Read(bitc::BlockSizeWidth);
+     
+     // Check that the block wasn't partially defined, and that the offset isn't
+     // bogus.
+     if (AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+       return true;
+     
+     NextChar += NumWords*4;
+     return false;
+   }
+   
+   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, read and enter
+   /// the block, returning the BlockID of the block we just entered.
+   bool EnterSubBlock() {
+     BlockScope.push_back(CurCodeSize);
+     
+     // Get the codesize of this block.
+     CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+     SkipToWord();
+     unsigned NumWords = Read(bitc::BlockSizeWidth);
+     
+     // Validate that this block is sane.
+     if (CurCodeSize == 0 || AtEndOfStream() || NextChar+NumWords*4 > LastChar)
+       return true;
+     
+     return false;
+   }
+   
+   bool ReadBlockEnd() {
+     if (BlockScope.empty()) return true;
+     
+     // Block tail:
+     //    [END_BLOCK, <align4bytes>]
+     SkipToWord();
+     CurCodeSize = BlockScope.back();
+     BlockScope.pop_back();
+     return false;
+   }
+   
+   //===--------------------------------------------------------------------===//
+   // Record Processing
+   //===--------------------------------------------------------------------===//
+   
+   unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals) {
+     if (AbbrevID == bitc::UNABBREV_RECORD) {
+       unsigned Code = ReadVBR(6);
+       unsigned NumElts = ReadVBR(6);
+       for (unsigned i = 0; i != NumElts; ++i)
+         Vals.push_back(ReadVBR64(6));
+       return Code;
+     }
+     
+     assert(0 && "Reading with abbrevs not implemented!");
+   }
+   
+ };
+ 
+ } // End llvm namespace
+ 
+ #endif
+ 
+     
\ No newline at end of file

Index: llvm/include/llvm/Bitcode/BitstreamWriter.h
diff -c /dev/null llvm/include/llvm/Bitcode/BitstreamWriter.h:1.1
*** /dev/null	Sun Apr 22 01:22:23 2007
--- llvm/include/llvm/Bitcode/BitstreamWriter.h	Sun Apr 22 01:22:05 2007
***************
*** 0 ****
--- 1,219 ----
+ //===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under
+ // the University of Illinois Open Source License.  See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This header defines the BitstreamWriter class.  This class can be used to
+ // write an arbitrary bitstream, regardless of its contents.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef BITSTREAM_WRITER_H
+ #define BITSTREAM_WRITER_H
+ 
+ #include "llvm/Bitcode/BitCodes.h"
+ #include "llvm/ADT/SmallVector.h"
+ #include <cassert>
+ #include <vector>
+ 
+ namespace llvm {
+ 
+ class BitstreamWriter {
+   std::vector<unsigned char> &Out;
+ 
+   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+   unsigned CurBit;
+   
+   /// CurValue - The current value.  Only bits < CurBit are valid.
+   uint32_t CurValue;
+   
+   // CurCodeSize - This is the declared size of code values used for the current
+   // block, in bits.
+   unsigned CurCodeSize;
+   
+   struct Block {
+     unsigned PrevCodeSize;
+     unsigned StartSizeWord;
+     Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+   };
+   
+   /// BlockScope - This tracks the current blocks that we have entered.
+   std::vector<Block> BlockScope;
+ public:
+   BitstreamWriter(std::vector<unsigned char> &O) 
+     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+ 
+   ~BitstreamWriter() {
+     assert(CurBit == 0 && "Unflused data remaining");
+     assert(BlockScope.empty() && "Block imbalance");
+   }
+   //===--------------------------------------------------------------------===//
+   // Basic Primitives for emitting bits to the stream.
+   //===--------------------------------------------------------------------===//
+   
+   void Emit(uint32_t Val, unsigned NumBits) {
+     assert(NumBits <= 32 && "Invalid value size!");
+     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+     CurValue |= Val << CurBit;
+     if (CurBit + NumBits < 32) {
+       CurBit += NumBits;
+       return;
+     }
+     
+     // Add the current word.
+     unsigned V = CurValue;
+     Out.push_back((unsigned char)(V >>  0));
+     Out.push_back((unsigned char)(V >>  8));
+     Out.push_back((unsigned char)(V >> 16));
+     Out.push_back((unsigned char)(V >> 24));
+     
+     if (CurBit)
+       CurValue = Val >> 32-CurBit;
+     else
+       CurValue = 0;
+     CurBit = (CurBit+NumBits) & 31;
+   }
+   
+   void Emit64(uint64_t Val, unsigned NumBits) {
+     if (NumBits <= 32)
+       Emit((uint32_t)Val, NumBits);
+     else {
+       Emit((uint32_t)Val, 32);
+       Emit((uint32_t)(Val >> 32), NumBits-32);
+     }
+   }
+   
+   void FlushToWord() {
+     if (CurBit) {
+       unsigned V = CurValue;
+       Out.push_back((unsigned char)(V >>  0));
+       Out.push_back((unsigned char)(V >>  8));
+       Out.push_back((unsigned char)(V >> 16));
+       Out.push_back((unsigned char)(V >> 24));
+       CurBit = 0;
+       CurValue = 0;
+     }
+   }
+   
+   void EmitVBR(uint32_t Val, unsigned NumBits) {
+     uint32_t Threshold = 1U << (NumBits-1);
+     
+     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+     while (Val >= Threshold) {
+       Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+       Val >>= NumBits-1;
+     }
+     
+     Emit(Val, NumBits);
+   }
+   
+   void EmitVBR64(uint64_t Val, unsigned NumBits) {
+     if ((uint32_t)Val == Val)
+       return EmitVBR((uint32_t)Val, NumBits);
+     
+     uint64_t Threshold = 1U << (NumBits-1);
+     
+     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+     while (Val >= Threshold) {
+       Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+            (1 << (NumBits-1)), NumBits);
+       Val >>= NumBits-1;
+     }
+     
+     Emit((uint32_t)Val, NumBits);
+   }
+   
+   /// EmitCode - Emit the specified code.
+   void EmitCode(unsigned Val) {
+     Emit(Val, CurCodeSize);
+   }
+   
+   //===--------------------------------------------------------------------===//
+   // Block Manipulation
+   //===--------------------------------------------------------------------===//
+   
+   void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
+     // Block header:
+     //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+     EmitCode(bitc::ENTER_SUBBLOCK);
+     EmitVBR(BlockID, bitc::BlockIDWidth);
+     EmitVBR(CodeLen, bitc::CodeLenWidth);
+     FlushToWord();
+     BlockScope.push_back(Block(CurCodeSize, Out.size()/4));
+     // Emit a placeholder, which will be replaced when the block is popped.
+     Emit(0, bitc::BlockSizeWidth);
+     
+     CurCodeSize = CodeLen;
+   }
+   
+   void ExitBlock() {
+     assert(!BlockScope.empty() && "Block scope imbalance!");
+     Block B = BlockScope.back();
+     BlockScope.pop_back();
+     
+     // Block tail:
+     //    [END_BLOCK, <align4bytes>]
+     EmitCode(bitc::END_BLOCK);
+     FlushToWord();
+ 
+     // Compute the size of the block, in words, not counting the size field.
+     unsigned SizeInWords = Out.size()/4-B.StartSizeWord - 1;
+     unsigned ByteNo = B.StartSizeWord*4;
+     
+     // Update the block size field in the header of this sub-block.
+     Out[ByteNo++] = (unsigned char)(SizeInWords >>  0);
+     Out[ByteNo++] = (unsigned char)(SizeInWords >>  8);
+     Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
+     Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+     
+     // Restore the outer block's code size.
+     CurCodeSize = B.PrevCodeSize;
+   }
+   
+   //===--------------------------------------------------------------------===//
+   // Record Emission
+   //===--------------------------------------------------------------------===//
+   
+   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+   /// we have one to compress the output.
+   void EmitRecord(unsigned Code, SmallVectorImpl<uint64_t> &Vals,
+                   unsigned Abbrev = 0) {
+     if (Abbrev) {
+       assert(0 && "abbrevs not implemented yet!");
+     } else {
+       // If we don't have an abbrev to use, emit this in its fully unabbreviated
+       // form.
+       EmitCode(bitc::UNABBREV_RECORD);
+       EmitVBR(Code, 6);
+       EmitVBR(Vals.size(), 6);
+       for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+         EmitVBR64(Vals[i], 6);
+     }
+   }
+   
+   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+   /// we have one to compress the output.
+   void EmitRecord(unsigned Code, SmallVectorImpl<unsigned> &Vals,
+                   unsigned Abbrev = 0) {
+     if (Abbrev) {
+       assert(0 && "abbrevs not implemented yet!");
+     } else {
+       // If we don't have an abbrev to use, emit this in its fully unabbreviated
+       // form.
+       EmitCode(bitc::UNABBREV_RECORD);
+       EmitVBR(Code, 6);
+       EmitVBR(Vals.size(), 6);
+       for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+         EmitVBR(Vals[i], 6);
+     }
+   }
+ };
+ 
+ 
+ } // End llvm namespace
+ 
+ #endif

Index: llvm/include/llvm/Bitcode/ReaderWriter.h
diff -c /dev/null llvm/include/llvm/Bitcode/ReaderWriter.h:1.1
*** /dev/null	Sun Apr 22 01:22:23 2007
--- llvm/include/llvm/Bitcode/ReaderWriter.h	Sun Apr 22 01:22:05 2007
***************
*** 0 ****
--- 1,38 ----
+ //===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This header defines interfaces to read and write LLVM bitcode files/streams.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #ifndef LLVM_BITCODE_H
+ #define LLVM_BITCODE_H
+ 
+ #include <iosfwd>
+ #include <string>
+ 
+ namespace llvm {
+   class Module;
+   class ModuleProvider;
+   
+   ModuleProvider *getBitcodeModuleProvider(const std::string &Filename,
+                                            std::string *ErrMsg = 0);
+ 
+   
+   /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+   /// If an error occurs, return null and fill in *ErrMsg if non-null.
+   Module *ParseBitcodeFile(const std::string &Filename,
+                            std::string *ErrMsg = 0);
+   
+   /// WriteBitcodeToFile - Write the specified module to the specified output
+   /// stream.
+   void WriteBitcodeToFile(const Module *M, std::ostream &Out);
+ } // End llvm namespace
+ 
+ #endif