[llvm] r264545 - Bitcode: Split out SimpleBitstreamCursor

Mon Mar 28 21:52:12 PDT 2016

> On 2016-Mar-28, at 21:51, Mehdi Amini <mehdi.amini at apple.com> wrote:
> 
> 
>> On Mar 27, 2016, at 3:40 PM, Duncan P. N. Exon Smith via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>> 
>> Author: dexonsmith
>> Date: Sun Mar 27 17:40:55 2016
>> New Revision: 264545
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=264545&view=rev
>> Log:
>> Bitcode: Split out SimpleBitstreamCursor
>> 
>> Split out SimpleBitstreamCursor from BitstreamCursor, which is a
>> lower-level cursor with no knowledge of bitcode blocks, abbreviations,
>> or records.  It just knows how to read bits and navigate the stream.
>> 
>> This is mainly organizational, to separate the API for manipulating raw
>> bits from that for bitcode concepts like Record and Block.
>> 
>> Modified:
>>   llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>>   llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>> 
>> Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=264545&r1=264544&r2=264545&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
>> +++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Sun Mar 27 17:40:55 2016
>> @@ -128,98 +128,42 @@ public:
>>  }
>> };
>> 
>> -/// When advancing through a bitstream cursor, each advance can discover a few
>> -/// different kinds of entries:
>> -struct BitstreamEntry {
>> -  enum {
>> -    Error,    // Malformed bitcode was found.
>> -    EndBlock, // We've reached the end of the current block, (or the end of the
>> -              // file, which is treated like a series of EndBlock records.
>> -    SubBlock, // This is the start of a new subblock of a specific ID.
>> -    Record    // This is a record with a specific AbbrevID.
>> -  } Kind;
>> -
>> -  unsigned ID;
>> -
>> -  static BitstreamEntry getError() {
>> -    BitstreamEntry E; E.Kind = Error; return E;
>> -  }
>> -  static BitstreamEntry getEndBlock() {
>> -    BitstreamEntry E; E.Kind = EndBlock; return E;
>> -  }
>> -  static BitstreamEntry getSubBlock(unsigned ID) {
>> -    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>> -  }
>> -  static BitstreamEntry getRecord(unsigned AbbrevID) {
>> -    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>> -  }
>> -};
>> -
>> -/// This represents a position within a bitcode file. There may be multiple
>> -/// independent cursors reading within one bitstream, each maintaining their own
>> -/// local state.
>> -///
>> -/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>> -/// be passed by value.
>> -class BitstreamCursor {
>> -  BitstreamReader *BitStream;
>> -  size_t NextChar;
>> +/// This represents a position within a bitstream. There may be multiple
>> +/// independent cursors reading within one bitstream, each maintaining their
>> +/// own local state.
>> +class SimpleBitstreamCursor {
>> +  BitstreamReader *R = nullptr;
>> +  size_t NextChar = 0;
>> 
>>  // The size of the bicode. 0 if we don't know it yet.
>> -  size_t Size;
>> +  size_t Size = 0;
>> 
>>  /// This is the current data we have pulled from the stream but have not
>>  /// returned to the client. This is specifically and intentionally defined to
>>  /// follow the word size of the host machine for efficiency. We use word_t in
>>  /// places that are aware of this to make it perfectly explicit what is going
>>  /// on.
>> +public:
>>  typedef size_t word_t;
>> -  word_t CurWord;
>> +
>> +private:
>> +  word_t CurWord = 0;
>> 
>>  /// This is the number of bits in CurWord that are valid. This is always from
>>  /// [0...bits_of(size_t)-1] inclusive.
>> -  unsigned BitsInCurWord;
>> -
>> -  // This is the declared size of code values used for the current block, in
>> -  // bits.
>> -  unsigned CurCodeSize;
>> -
>> -  /// Abbrevs installed at in this block.
>> -  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>> -
>> -  struct Block {
>> -    unsigned PrevCodeSize;
>> -    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>> -    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>> -  };
>> -
>> -  /// This tracks the codesize of parent blocks.
>> -  SmallVector<Block, 8> BlockScope;
>> -
>> +  unsigned BitsInCurWord = 0;
>> 
>> public:
>>  static const size_t MaxChunkSize = sizeof(word_t) * 8;
>> 
>> -  BitstreamCursor() { init(nullptr); }
>> -
>> -  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>> -
>> -  void init(BitstreamReader *R) {
>> -    freeState();
>> +  SimpleBitstreamCursor() = default;
>> 
>> -    BitStream = R;
>> -    NextChar = 0;
>> -    Size = 0;
>> -    BitsInCurWord = 0;
>> -    CurCodeSize = 2;
>> -  }
>> -
>> -  void freeState();
>> +  explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
>> 
>>  bool canSkipToPos(size_t pos) const {
>>    // pos can be skipped to if it is a valid address or one byte past the end.
>> -    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
>> -        static_cast<uint64_t>(pos - 1));
>> +    return pos == 0 ||
>> +           R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
>>  }
>> 
>>  bool AtEndOfStream() {
>> @@ -231,72 +175,13 @@ public:
>>    return BitsInCurWord == 0;
>>  }
>> 
>> -  /// Return the number of bits used to encode an abbrev #.
>> -  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>> -
>>  /// Return the bit # of the bit we are reading.
>>  uint64_t GetCurrentBitNo() const {
>>    return NextChar*CHAR_BIT - BitsInCurWord;
>>  }
>> 
>> -  BitstreamReader *getBitStreamReader() {
>> -    return BitStream;
>> -  }
>> -  const BitstreamReader *getBitStreamReader() const {
>> -    return BitStream;
>> -  }
>> -
>> -  /// Flags that modify the behavior of advance().
>> -  enum {
>> -    /// If this flag is used, the advance() method does not automatically pop
>> -    /// the block scope when the end of a block is reached.
>> -    AF_DontPopBlockAtEnd = 1,
>> -
>> -    /// If this flag is used, abbrev entries are returned just like normal
>> -    /// records.
>> -    AF_DontAutoprocessAbbrevs = 2
>> -  };
>> -
>> -  /// Advance the current bitstream, returning the next entry in the stream.
>> -  BitstreamEntry advance(unsigned Flags = 0) {
>> -    while (1) {
>> -      unsigned Code = ReadCode();
>> -      if (Code == bitc::END_BLOCK) {
>> -        // Pop the end of the block unless Flags tells us not to.
>> -        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>> -          return BitstreamEntry::getError();
>> -        return BitstreamEntry::getEndBlock();
>> -      }
>> -
>> -      if (Code == bitc::ENTER_SUBBLOCK)
>> -        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>> -
>> -      if (Code == bitc::DEFINE_ABBREV &&
>> -          !(Flags & AF_DontAutoprocessAbbrevs)) {
>> -        // We read and accumulate abbrev's, the client can't do anything with
>> -        // them anyway.
>> -        ReadAbbrevRecord();
>> -        continue;
>> -      }
>> -
>> -      return BitstreamEntry::getRecord(Code);
>> -    }
>> -  }
>> -
>> -  /// This is a convenience function for clients that don't expect any
>> -  /// subblocks. This just skips over them automatically.
>> -  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>> -    while (1) {
>> -      // If we found a normal entry, return it.
>> -      BitstreamEntry Entry = advance(Flags);
>> -      if (Entry.Kind != BitstreamEntry::SubBlock)
>> -        return Entry;
>> -
>> -      // If we found a sub-block, just skip over it and check the next entry.
>> -      if (SkipBlock())
>> -        return BitstreamEntry::getError();
>> -    }
>> -  }
>> +  BitstreamReader *getBitStreamReader() { return R; }
>> +  const BitstreamReader *getBitStreamReader() const { return R; }
>> 
>>  /// Reset the stream to the specified bit number.
>>  void JumpToBit(uint64_t BitNo) {
>> @@ -321,7 +206,7 @@ public:
>>    uint8_t Array[sizeof(word_t)] = {0};
>> 
>>    uint64_t BytesRead =
>> -        BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>> +        R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>> 
>>    // If we run out of data, stop at the end of the stream.
>>    if (BytesRead == 0) {
>> @@ -416,7 +301,6 @@ public:
>>    }
>>  }
>> 
>> -private:
>>  void SkipToFourByteBoundary() {
>>    // If word_t is 64-bits and if we've read less than 32 bits, just dump
>>    // the bits we have up to the next 32-bit boundary.
>> @@ -429,7 +313,140 @@ private:
>> 
>>    BitsInCurWord = 0;
>>  }
>> +
>> +  /// Skip to the end of the file.
>> +  void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
>> +};
>> +
>> +/// When advancing through a bitstream cursor, each advance can discover a few
>> +/// different kinds of entries:
>> +struct BitstreamEntry {
>> +  enum {
>> +    Error,    // Malformed bitcode was found.
>> +    EndBlock, // We've reached the end of the current block, (or the end of the
>> +              // file, which is treated like a series of EndBlock records.
>> +    SubBlock, // This is the start of a new subblock of a specific ID.
>> +    Record    // This is a record with a specific AbbrevID.
>> +  } Kind;
>> +
>> +  unsigned ID;
>> +
>> +  static BitstreamEntry getError() {
>> +    BitstreamEntry E; E.Kind = Error; return E;
>> +  }
>> +  static BitstreamEntry getEndBlock() {
>> +    BitstreamEntry E; E.Kind = EndBlock; return E;
>> +  }
>> +  static BitstreamEntry getSubBlock(unsigned ID) {
>> +    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>> +  }
>> +  static BitstreamEntry getRecord(unsigned AbbrevID) {
>> +    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>> +  }
>> +};
>> +
>> +/// This represents a position within a bitcode file, implemented on top of a
>> +/// SimpleBitstreamCursor.
>> +///
>> +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>> +/// be passed by value.
>> +class BitstreamCursor : SimpleBitstreamCursor {
> 
> Is the private inheritance intended here?
> 

Yes.

> -- 
> Mehdi
> 
> 
> 
> 
>> +  // This is the declared size of code values used for the current block, in
>> +  // bits.
>> +  unsigned CurCodeSize = 2;
>> +
>> +  /// Abbrevs installed at in this block.
>> +  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>> +
>> +  struct Block {
>> +    unsigned PrevCodeSize;
>> +    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>> +    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>> +  };
>> +
>> +  /// This tracks the codesize of parent blocks.
>> +  SmallVector<Block, 8> BlockScope;
>> +
>> +
>> public:
>> +  static const size_t MaxChunkSize = sizeof(word_t) * 8;
>> +
>> +  BitstreamCursor() = default;
>> +
>> +  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>> +
>> +  void init(BitstreamReader *R) {
>> +    freeState();
>> +    SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
>> +    CurCodeSize = 2;
>> +  }
>> +
>> +  void freeState();
>> +
>> +  using SimpleBitstreamCursor::canSkipToPos;
>> +  using SimpleBitstreamCursor::AtEndOfStream;
>> +  using SimpleBitstreamCursor::GetCurrentBitNo;
>> +  using SimpleBitstreamCursor::getBitStreamReader;
>> +  using SimpleBitstreamCursor::JumpToBit;
>> +  using SimpleBitstreamCursor::fillCurWord;
>> +  using SimpleBitstreamCursor::Read;
>> +  using SimpleBitstreamCursor::ReadVBR;
>> +  using SimpleBitstreamCursor::ReadVBR64;
>> +
>> +  /// Return the number of bits used to encode an abbrev #.
>> +  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>> +
>> +  /// Flags that modify the behavior of advance().
>> +  enum {
>> +    /// If this flag is used, the advance() method does not automatically pop
>> +    /// the block scope when the end of a block is reached.
>> +    AF_DontPopBlockAtEnd = 1,
>> +
>> +    /// If this flag is used, abbrev entries are returned just like normal
>> +    /// records.
>> +    AF_DontAutoprocessAbbrevs = 2
>> +  };
>> +
>> +  /// Advance the current bitstream, returning the next entry in the stream.
>> +  BitstreamEntry advance(unsigned Flags = 0) {
>> +    while (1) {
>> +      unsigned Code = ReadCode();
>> +      if (Code == bitc::END_BLOCK) {
>> +        // Pop the end of the block unless Flags tells us not to.
>> +        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>> +          return BitstreamEntry::getError();
>> +        return BitstreamEntry::getEndBlock();
>> +      }
>> +
>> +      if (Code == bitc::ENTER_SUBBLOCK)
>> +        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>> +
>> +      if (Code == bitc::DEFINE_ABBREV &&
>> +          !(Flags & AF_DontAutoprocessAbbrevs)) {
>> +        // We read and accumulate abbrev's, the client can't do anything with
>> +        // them anyway.
>> +        ReadAbbrevRecord();
>> +        continue;
>> +      }
>> +
>> +      return BitstreamEntry::getRecord(Code);
>> +    }
>> +  }
>> +
>> +  /// This is a convenience function for clients that don't expect any
>> +  /// subblocks. This just skips over them automatically.
>> +  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>> +    while (1) {
>> +      // If we found a normal entry, return it.
>> +      BitstreamEntry Entry = advance(Flags);
>> +      if (Entry.Kind != BitstreamEntry::SubBlock)
>> +        return Entry;
>> +
>> +      // If we found a sub-block, just skip over it and check the next entry.
>> +      if (SkipBlock())
>> +        return BitstreamEntry::getError();
>> +    }
>> +  }
>> 
>>  unsigned ReadCode() {
>>    return Read(CurCodeSize);
>> 
>> Modified: llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp?rev=264545&r1=264544&r2=264545&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp (original)
>> +++ llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp Sun Mar 27 17:40:55 2016
>> @@ -32,7 +32,7 @@ bool BitstreamCursor::EnterSubBlock(unsi
>> 
>>  // Add the abbrevs specific to this block to the CurAbbrevs list.
>>  if (const BitstreamReader::BlockInfo *Info =
>> -      BitStream->getBlockInfo(BlockID)) {
>> +          getBitStreamReader()->getBlockInfo(BlockID)) {
>>    CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
>>                      Info->Abbrevs.end());
>>  }
>> @@ -164,7 +164,7 @@ void BitstreamCursor::skipRecord(unsigne
>>    // If this would read off the end of the bitcode file, just set the
>>    // record to empty and return.
>>    if (!canSkipToPos(NewEnd/8)) {
>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>> +      skipToEnd();
>>      break;
>>    }
>> 
>> @@ -256,13 +256,14 @@ unsigned BitstreamCursor::readRecord(uns
>>    // record to empty and return.
>>    if (!canSkipToPos(NewEnd/8)) {
>>      Vals.append(NumElts, 0);
>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>> +      skipToEnd();
>>      break;
>>    }
>> 
>>    // Otherwise, inform the streamer that we need these bytes in memory.
>> -    const char *Ptr = (const char*)
>> -      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
>> +    const char *Ptr =
>> +        (const char *)getBitStreamReader()->getBitcodeBytes().getPointer(
>> +            CurBitPos / 8, NumElts);
>> 
>>    // If we can return a reference to the data, do so to avoid copying it.
>>    if (Blob) {
>> @@ -320,7 +321,7 @@ void BitstreamCursor::ReadAbbrevRecord()
>> 
>> bool BitstreamCursor::ReadBlockInfoBlock() {
>>  // If this is the second stream to get to the block info block, skip it.
>> -  if (BitStream->hasBlockInfoRecords())
>> +  if (getBitStreamReader()->hasBlockInfoRecords())
>>    return SkipBlock();
>> 
>>  if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
>> @@ -361,11 +362,13 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>      default: break;  // Default behavior, ignore unknown content.
>>      case bitc::BLOCKINFO_CODE_SETBID:
>>        if (Record.size() < 1) return true;
>> -        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
>> +        CurBlockInfo =
>> +            &getBitStreamReader()->getOrCreateBlockInfo((unsigned)Record[0]);
>>        break;
>>      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
>>        if (!CurBlockInfo) return true;
>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>> +          break; // Ignore name.
>>        std::string Name;
>>        for (unsigned i = 0, e = Record.size(); i != e; ++i)
>>          Name += (char)Record[i];
>> @@ -374,7 +377,8 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>      }
>>      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
>>        if (!CurBlockInfo) return true;
>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>> +          break; // Ignore name.
>>        std::string Name;
>>        for (unsigned i = 1, e = Record.size(); i != e; ++i)
>>          Name += (char)Record[i];
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>