[llvm] r264545 - Bitcode: Split out SimpleBitstreamCursor

Mon Mar 28 21:51:43 PDT 2016

> On Mar 27, 2016, at 3:40 PM, Duncan P. N. Exon Smith via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> Author: dexonsmith
> Date: Sun Mar 27 17:40:55 2016
> New Revision: 264545
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=264545&view=rev
> Log:
> Bitcode: Split out SimpleBitstreamCursor
> 
> Split out SimpleBitstreamCursor from BitstreamCursor, which is a
> lower-level cursor with no knowledge of bitcode blocks, abbreviations,
> or records.  It just knows how to read bits and navigate the stream.
> 
> This is mainly organizational, to separate the API for manipulating raw
> bits from that for bitcode concepts like Record and Block.
> 
> Modified:
>    llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>    llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
> 
> Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=264545&r1=264544&r2=264545&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
> +++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Sun Mar 27 17:40:55 2016
> @@ -128,98 +128,42 @@ public:
>   }
> };
> 
> -/// When advancing through a bitstream cursor, each advance can discover a few
> -/// different kinds of entries:
> -struct BitstreamEntry {
> -  enum {
> -    Error,    // Malformed bitcode was found.
> -    EndBlock, // We've reached the end of the current block, (or the end of the
> -              // file, which is treated like a series of EndBlock records.
> -    SubBlock, // This is the start of a new subblock of a specific ID.
> -    Record    // This is a record with a specific AbbrevID.
> -  } Kind;
> -
> -  unsigned ID;
> -
> -  static BitstreamEntry getError() {
> -    BitstreamEntry E; E.Kind = Error; return E;
> -  }
> -  static BitstreamEntry getEndBlock() {
> -    BitstreamEntry E; E.Kind = EndBlock; return E;
> -  }
> -  static BitstreamEntry getSubBlock(unsigned ID) {
> -    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
> -  }
> -  static BitstreamEntry getRecord(unsigned AbbrevID) {
> -    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
> -  }
> -};
> -
> -/// This represents a position within a bitcode file. There may be multiple
> -/// independent cursors reading within one bitstream, each maintaining their own
> -/// local state.
> -///
> -/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
> -/// be passed by value.
> -class BitstreamCursor {
> -  BitstreamReader *BitStream;
> -  size_t NextChar;
> +/// This represents a position within a bitstream. There may be multiple
> +/// independent cursors reading within one bitstream, each maintaining their
> +/// own local state.
> +class SimpleBitstreamCursor {
> +  BitstreamReader *R = nullptr;
> +  size_t NextChar = 0;
> 
>   // The size of the bicode. 0 if we don't know it yet.
> -  size_t Size;
> +  size_t Size = 0;
> 
>   /// This is the current data we have pulled from the stream but have not
>   /// returned to the client. This is specifically and intentionally defined to
>   /// follow the word size of the host machine for efficiency. We use word_t in
>   /// places that are aware of this to make it perfectly explicit what is going
>   /// on.
> +public:
>   typedef size_t word_t;
> -  word_t CurWord;
> +
> +private:
> +  word_t CurWord = 0;
> 
>   /// This is the number of bits in CurWord that are valid. This is always from
>   /// [0...bits_of(size_t)-1] inclusive.
> -  unsigned BitsInCurWord;
> -
> -  // This is the declared size of code values used for the current block, in
> -  // bits.
> -  unsigned CurCodeSize;
> -
> -  /// Abbrevs installed at in this block.
> -  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
> -
> -  struct Block {
> -    unsigned PrevCodeSize;
> -    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
> -    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
> -  };
> -
> -  /// This tracks the codesize of parent blocks.
> -  SmallVector<Block, 8> BlockScope;
> -
> +  unsigned BitsInCurWord = 0;
> 
> public:
>   static const size_t MaxChunkSize = sizeof(word_t) * 8;
> 
> -  BitstreamCursor() { init(nullptr); }
> -
> -  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
> -
> -  void init(BitstreamReader *R) {
> -    freeState();
> +  SimpleBitstreamCursor() = default;
> 
> -    BitStream = R;
> -    NextChar = 0;
> -    Size = 0;
> -    BitsInCurWord = 0;
> -    CurCodeSize = 2;
> -  }
> -
> -  void freeState();
> +  explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
> 
>   bool canSkipToPos(size_t pos) const {
>     // pos can be skipped to if it is a valid address or one byte past the end.
> -    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
> -        static_cast<uint64_t>(pos - 1));
> +    return pos == 0 ||
> +           R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
>   }
> 
>   bool AtEndOfStream() {
> @@ -231,72 +175,13 @@ public:
>     return BitsInCurWord == 0;
>   }
> 
> -  /// Return the number of bits used to encode an abbrev #.
> -  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
> -
>   /// Return the bit # of the bit we are reading.
>   uint64_t GetCurrentBitNo() const {
>     return NextChar*CHAR_BIT - BitsInCurWord;
>   }
> 
> -  BitstreamReader *getBitStreamReader() {
> -    return BitStream;
> -  }
> -  const BitstreamReader *getBitStreamReader() const {
> -    return BitStream;
> -  }
> -
> -  /// Flags that modify the behavior of advance().
> -  enum {
> -    /// If this flag is used, the advance() method does not automatically pop
> -    /// the block scope when the end of a block is reached.
> -    AF_DontPopBlockAtEnd = 1,
> -
> -    /// If this flag is used, abbrev entries are returned just like normal
> -    /// records.
> -    AF_DontAutoprocessAbbrevs = 2
> -  };
> -
> -  /// Advance the current bitstream, returning the next entry in the stream.
> -  BitstreamEntry advance(unsigned Flags = 0) {
> -    while (1) {
> -      unsigned Code = ReadCode();
> -      if (Code == bitc::END_BLOCK) {
> -        // Pop the end of the block unless Flags tells us not to.
> -        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
> -          return BitstreamEntry::getError();
> -        return BitstreamEntry::getEndBlock();
> -      }
> -
> -      if (Code == bitc::ENTER_SUBBLOCK)
> -        return BitstreamEntry::getSubBlock(ReadSubBlockID());
> -
> -      if (Code == bitc::DEFINE_ABBREV &&
> -          !(Flags & AF_DontAutoprocessAbbrevs)) {
> -        // We read and accumulate abbrev's, the client can't do anything with
> -        // them anyway.
> -        ReadAbbrevRecord();
> -        continue;
> -      }
> -
> -      return BitstreamEntry::getRecord(Code);
> -    }
> -  }
> -
> -  /// This is a convenience function for clients that don't expect any
> -  /// subblocks. This just skips over them automatically.
> -  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
> -    while (1) {
> -      // If we found a normal entry, return it.
> -      BitstreamEntry Entry = advance(Flags);
> -      if (Entry.Kind != BitstreamEntry::SubBlock)
> -        return Entry;
> -
> -      // If we found a sub-block, just skip over it and check the next entry.
> -      if (SkipBlock())
> -        return BitstreamEntry::getError();
> -    }
> -  }
> +  BitstreamReader *getBitStreamReader() { return R; }
> +  const BitstreamReader *getBitStreamReader() const { return R; }
> 
>   /// Reset the stream to the specified bit number.
>   void JumpToBit(uint64_t BitNo) {
> @@ -321,7 +206,7 @@ public:
>     uint8_t Array[sizeof(word_t)] = {0};
> 
>     uint64_t BytesRead =
> -        BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
> +        R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
> 
>     // If we run out of data, stop at the end of the stream.
>     if (BytesRead == 0) {
> @@ -416,7 +301,6 @@ public:
>     }
>   }
> 
> -private:
>   void SkipToFourByteBoundary() {
>     // If word_t is 64-bits and if we've read less than 32 bits, just dump
>     // the bits we have up to the next 32-bit boundary.
> @@ -429,7 +313,140 @@ private:
> 
>     BitsInCurWord = 0;
>   }
> +
> +  /// Skip to the end of the file.
> +  void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
> +};
> +
> +/// When advancing through a bitstream cursor, each advance can discover a few
> +/// different kinds of entries:
> +struct BitstreamEntry {
> +  enum {
> +    Error,    // Malformed bitcode was found.
> +    EndBlock, // We've reached the end of the current block, (or the end of the
> +              // file, which is treated like a series of EndBlock records.
> +    SubBlock, // This is the start of a new subblock of a specific ID.
> +    Record    // This is a record with a specific AbbrevID.
> +  } Kind;
> +
> +  unsigned ID;
> +
> +  static BitstreamEntry getError() {
> +    BitstreamEntry E; E.Kind = Error; return E;
> +  }
> +  static BitstreamEntry getEndBlock() {
> +    BitstreamEntry E; E.Kind = EndBlock; return E;
> +  }
> +  static BitstreamEntry getSubBlock(unsigned ID) {
> +    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
> +  }
> +  static BitstreamEntry getRecord(unsigned AbbrevID) {
> +    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
> +  }
> +};
> +
> +/// This represents a position within a bitcode file, implemented on top of a
> +/// SimpleBitstreamCursor.
> +///
> +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
> +/// be passed by value.
> +class BitstreamCursor : SimpleBitstreamCursor {

Is the private inheritance intended here?

-- 
Mehdi

> +  // This is the declared size of code values used for the current block, in
> +  // bits.
> +  unsigned CurCodeSize = 2;
> +
> +  /// Abbrevs installed at in this block.
> +  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
> +
> +  struct Block {
> +    unsigned PrevCodeSize;
> +    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
> +    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
> +  };
> +
> +  /// This tracks the codesize of parent blocks.
> +  SmallVector<Block, 8> BlockScope;
> +
> +
> public:
> +  static const size_t MaxChunkSize = sizeof(word_t) * 8;
> +
> +  BitstreamCursor() = default;
> +
> +  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
> +
> +  void init(BitstreamReader *R) {
> +    freeState();
> +    SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
> +    CurCodeSize = 2;
> +  }
> +
> +  void freeState();
> +
> +  using SimpleBitstreamCursor::canSkipToPos;
> +  using SimpleBitstreamCursor::AtEndOfStream;
> +  using SimpleBitstreamCursor::GetCurrentBitNo;
> +  using SimpleBitstreamCursor::getBitStreamReader;
> +  using SimpleBitstreamCursor::JumpToBit;
> +  using SimpleBitstreamCursor::fillCurWord;
> +  using SimpleBitstreamCursor::Read;
> +  using SimpleBitstreamCursor::ReadVBR;
> +  using SimpleBitstreamCursor::ReadVBR64;
> +
> +  /// Return the number of bits used to encode an abbrev #.
> +  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
> +
> +  /// Flags that modify the behavior of advance().
> +  enum {
> +    /// If this flag is used, the advance() method does not automatically pop
> +    /// the block scope when the end of a block is reached.
> +    AF_DontPopBlockAtEnd = 1,
> +
> +    /// If this flag is used, abbrev entries are returned just like normal
> +    /// records.
> +    AF_DontAutoprocessAbbrevs = 2
> +  };
> +
> +  /// Advance the current bitstream, returning the next entry in the stream.
> +  BitstreamEntry advance(unsigned Flags = 0) {
> +    while (1) {
> +      unsigned Code = ReadCode();
> +      if (Code == bitc::END_BLOCK) {
> +        // Pop the end of the block unless Flags tells us not to.
> +        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
> +          return BitstreamEntry::getError();
> +        return BitstreamEntry::getEndBlock();
> +      }
> +
> +      if (Code == bitc::ENTER_SUBBLOCK)
> +        return BitstreamEntry::getSubBlock(ReadSubBlockID());
> +
> +      if (Code == bitc::DEFINE_ABBREV &&
> +          !(Flags & AF_DontAutoprocessAbbrevs)) {
> +        // We read and accumulate abbrev's, the client can't do anything with
> +        // them anyway.
> +        ReadAbbrevRecord();
> +        continue;
> +      }
> +
> +      return BitstreamEntry::getRecord(Code);
> +    }
> +  }
> +
> +  /// This is a convenience function for clients that don't expect any
> +  /// subblocks. This just skips over them automatically.
> +  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
> +    while (1) {
> +      // If we found a normal entry, return it.
> +      BitstreamEntry Entry = advance(Flags);
> +      if (Entry.Kind != BitstreamEntry::SubBlock)
> +        return Entry;
> +
> +      // If we found a sub-block, just skip over it and check the next entry.
> +      if (SkipBlock())
> +        return BitstreamEntry::getError();
> +    }
> +  }
> 
>   unsigned ReadCode() {
>     return Read(CurCodeSize);
> 
> Modified: llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp?rev=264545&r1=264544&r2=264545&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp (original)
> +++ llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp Sun Mar 27 17:40:55 2016
> @@ -32,7 +32,7 @@ bool BitstreamCursor::EnterSubBlock(unsi
> 
>   // Add the abbrevs specific to this block to the CurAbbrevs list.
>   if (const BitstreamReader::BlockInfo *Info =
> -      BitStream->getBlockInfo(BlockID)) {
> +          getBitStreamReader()->getBlockInfo(BlockID)) {
>     CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
>                       Info->Abbrevs.end());
>   }
> @@ -164,7 +164,7 @@ void BitstreamCursor::skipRecord(unsigne
>     // If this would read off the end of the bitcode file, just set the
>     // record to empty and return.
>     if (!canSkipToPos(NewEnd/8)) {
> -      NextChar = BitStream->getBitcodeBytes().getExtent();
> +      skipToEnd();
>       break;
>     }
> 
> @@ -256,13 +256,14 @@ unsigned BitstreamCursor::readRecord(uns
>     // record to empty and return.
>     if (!canSkipToPos(NewEnd/8)) {
>       Vals.append(NumElts, 0);
> -      NextChar = BitStream->getBitcodeBytes().getExtent();
> +      skipToEnd();
>       break;
>     }
> 
>     // Otherwise, inform the streamer that we need these bytes in memory.
> -    const char *Ptr = (const char*)
> -      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
> +    const char *Ptr =
> +        (const char *)getBitStreamReader()->getBitcodeBytes().getPointer(
> +            CurBitPos / 8, NumElts);
> 
>     // If we can return a reference to the data, do so to avoid copying it.
>     if (Blob) {
> @@ -320,7 +321,7 @@ void BitstreamCursor::ReadAbbrevRecord()
> 
> bool BitstreamCursor::ReadBlockInfoBlock() {
>   // If this is the second stream to get to the block info block, skip it.
> -  if (BitStream->hasBlockInfoRecords())
> +  if (getBitStreamReader()->hasBlockInfoRecords())
>     return SkipBlock();
> 
>   if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
> @@ -361,11 +362,13 @@ bool BitstreamCursor::ReadBlockInfoBlock
>       default: break;  // Default behavior, ignore unknown content.
>       case bitc::BLOCKINFO_CODE_SETBID:
>         if (Record.size() < 1) return true;
> -        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
> +        CurBlockInfo =
> +            &getBitStreamReader()->getOrCreateBlockInfo((unsigned)Record[0]);
>         break;
>       case bitc::BLOCKINFO_CODE_BLOCKNAME: {
>         if (!CurBlockInfo) return true;
> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
> +          break; // Ignore name.
>         std::string Name;
>         for (unsigned i = 0, e = Record.size(); i != e; ++i)
>           Name += (char)Record[i];
> @@ -374,7 +377,8 @@ bool BitstreamCursor::ReadBlockInfoBlock
>       }
>       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
>         if (!CurBlockInfo) return true;
> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
> +          break; // Ignore name.
>         std::string Name;
>         for (unsigned i = 1, e = Record.size(); i != e; ++i)
>           Name += (char)Record[i];
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits