[llvm] r264545 - Bitcode: Split out SimpleBitstreamCursor

Mon Mar 28 22:05:41 PDT 2016

> On Mar 28, 2016, at 9:52 PM, Duncan P. N. Exon Smith <dexonsmith at apple.com> wrote:
> 
> 
>> On 2016-Mar-28, at 21:51, Mehdi Amini <mehdi.amini at apple.com> wrote:
>> 
>> 
>>> On Mar 27, 2016, at 3:40 PM, Duncan P. N. Exon Smith via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>>> 
>>> Author: dexonsmith
>>> Date: Sun Mar 27 17:40:55 2016
>>> New Revision: 264545
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=264545&view=rev
>>> Log:
>>> Bitcode: Split out SimpleBitstreamCursor
>>> 
>>> Split out SimpleBitstreamCursor from BitstreamCursor, which is a
>>> lower-level cursor with no knowledge of bitcode blocks, abbreviations,
>>> or records.  It just knows how to read bits and navigate the stream.
>>> 
>>> This is mainly organizational, to separate the API for manipulating raw
>>> bits from that for bitcode concepts like Record and Block.
>>> 
>>> Modified:
>>>  llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>>>  llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>>> 
>>> Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=264545&r1=264544&r2=264545&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
>>> +++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Sun Mar 27 17:40:55 2016
>>> @@ -128,98 +128,42 @@ public:
>>> }
>>> };
>>> 
>>> -/// When advancing through a bitstream cursor, each advance can discover a few
>>> -/// different kinds of entries:
>>> -struct BitstreamEntry {
>>> -  enum {
>>> -    Error,    // Malformed bitcode was found.
>>> -    EndBlock, // We've reached the end of the current block, (or the end of the
>>> -              // file, which is treated like a series of EndBlock records.
>>> -    SubBlock, // This is the start of a new subblock of a specific ID.
>>> -    Record    // This is a record with a specific AbbrevID.
>>> -  } Kind;
>>> -
>>> -  unsigned ID;
>>> -
>>> -  static BitstreamEntry getError() {
>>> -    BitstreamEntry E; E.Kind = Error; return E;
>>> -  }
>>> -  static BitstreamEntry getEndBlock() {
>>> -    BitstreamEntry E; E.Kind = EndBlock; return E;
>>> -  }
>>> -  static BitstreamEntry getSubBlock(unsigned ID) {
>>> -    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>>> -  }
>>> -  static BitstreamEntry getRecord(unsigned AbbrevID) {
>>> -    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>>> -  }
>>> -};
>>> -
>>> -/// This represents a position within a bitcode file. There may be multiple
>>> -/// independent cursors reading within one bitstream, each maintaining their own
>>> -/// local state.
>>> -///
>>> -/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>>> -/// be passed by value.
>>> -class BitstreamCursor {
>>> -  BitstreamReader *BitStream;
>>> -  size_t NextChar;
>>> +/// This represents a position within a bitstream. There may be multiple
>>> +/// independent cursors reading within one bitstream, each maintaining their
>>> +/// own local state.
>>> +class SimpleBitstreamCursor {
>>> +  BitstreamReader *R = nullptr;
>>> +  size_t NextChar = 0;
>>> 
>>> // The size of the bicode. 0 if we don't know it yet.
>>> -  size_t Size;
>>> +  size_t Size = 0;
>>> 
>>> /// This is the current data we have pulled from the stream but have not
>>> /// returned to the client. This is specifically and intentionally defined to
>>> /// follow the word size of the host machine for efficiency. We use word_t in
>>> /// places that are aware of this to make it perfectly explicit what is going
>>> /// on.
>>> +public:
>>> typedef size_t word_t;
>>> -  word_t CurWord;
>>> +
>>> +private:
>>> +  word_t CurWord = 0;
>>> 
>>> /// This is the number of bits in CurWord that are valid. This is always from
>>> /// [0...bits_of(size_t)-1] inclusive.
>>> -  unsigned BitsInCurWord;
>>> -
>>> -  // This is the declared size of code values used for the current block, in
>>> -  // bits.
>>> -  unsigned CurCodeSize;
>>> -
>>> -  /// Abbrevs installed at in this block.
>>> -  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>>> -
>>> -  struct Block {
>>> -    unsigned PrevCodeSize;
>>> -    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>>> -    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>>> -  };
>>> -
>>> -  /// This tracks the codesize of parent blocks.
>>> -  SmallVector<Block, 8> BlockScope;
>>> -
>>> +  unsigned BitsInCurWord = 0;
>>> 
>>> public:
>>> static const size_t MaxChunkSize = sizeof(word_t) * 8;
>>> 
>>> -  BitstreamCursor() { init(nullptr); }
>>> -
>>> -  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>>> -
>>> -  void init(BitstreamReader *R) {
>>> -    freeState();
>>> +  SimpleBitstreamCursor() = default;
>>> 
>>> -    BitStream = R;
>>> -    NextChar = 0;
>>> -    Size = 0;
>>> -    BitsInCurWord = 0;
>>> -    CurCodeSize = 2;
>>> -  }
>>> -
>>> -  void freeState();
>>> +  explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
>>> 
>>> bool canSkipToPos(size_t pos) const {
>>>   // pos can be skipped to if it is a valid address or one byte past the end.
>>> -    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
>>> -        static_cast<uint64_t>(pos - 1));
>>> +    return pos == 0 ||
>>> +           R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
>>> }
>>> 
>>> bool AtEndOfStream() {
>>> @@ -231,72 +175,13 @@ public:
>>>   return BitsInCurWord == 0;
>>> }
>>> 
>>> -  /// Return the number of bits used to encode an abbrev #.
>>> -  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>>> -
>>> /// Return the bit # of the bit we are reading.
>>> uint64_t GetCurrentBitNo() const {
>>>   return NextChar*CHAR_BIT - BitsInCurWord;
>>> }
>>> 
>>> -  BitstreamReader *getBitStreamReader() {
>>> -    return BitStream;
>>> -  }
>>> -  const BitstreamReader *getBitStreamReader() const {
>>> -    return BitStream;
>>> -  }
>>> -
>>> -  /// Flags that modify the behavior of advance().
>>> -  enum {
>>> -    /// If this flag is used, the advance() method does not automatically pop
>>> -    /// the block scope when the end of a block is reached.
>>> -    AF_DontPopBlockAtEnd = 1,
>>> -
>>> -    /// If this flag is used, abbrev entries are returned just like normal
>>> -    /// records.
>>> -    AF_DontAutoprocessAbbrevs = 2
>>> -  };
>>> -
>>> -  /// Advance the current bitstream, returning the next entry in the stream.
>>> -  BitstreamEntry advance(unsigned Flags = 0) {
>>> -    while (1) {
>>> -      unsigned Code = ReadCode();
>>> -      if (Code == bitc::END_BLOCK) {
>>> -        // Pop the end of the block unless Flags tells us not to.
>>> -        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>>> -          return BitstreamEntry::getError();
>>> -        return BitstreamEntry::getEndBlock();
>>> -      }
>>> -
>>> -      if (Code == bitc::ENTER_SUBBLOCK)
>>> -        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>>> -
>>> -      if (Code == bitc::DEFINE_ABBREV &&
>>> -          !(Flags & AF_DontAutoprocessAbbrevs)) {
>>> -        // We read and accumulate abbrev's, the client can't do anything with
>>> -        // them anyway.
>>> -        ReadAbbrevRecord();
>>> -        continue;
>>> -      }
>>> -
>>> -      return BitstreamEntry::getRecord(Code);
>>> -    }
>>> -  }
>>> -
>>> -  /// This is a convenience function for clients that don't expect any
>>> -  /// subblocks. This just skips over them automatically.
>>> -  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>>> -    while (1) {
>>> -      // If we found a normal entry, return it.
>>> -      BitstreamEntry Entry = advance(Flags);
>>> -      if (Entry.Kind != BitstreamEntry::SubBlock)
>>> -        return Entry;
>>> -
>>> -      // If we found a sub-block, just skip over it and check the next entry.
>>> -      if (SkipBlock())
>>> -        return BitstreamEntry::getError();
>>> -    }
>>> -  }
>>> +  BitstreamReader *getBitStreamReader() { return R; }
>>> +  const BitstreamReader *getBitStreamReader() const { return R; }
>>> 
>>> /// Reset the stream to the specified bit number.
>>> void JumpToBit(uint64_t BitNo) {
>>> @@ -321,7 +206,7 @@ public:
>>>   uint8_t Array[sizeof(word_t)] = {0};
>>> 
>>>   uint64_t BytesRead =
>>> -        BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>>> +        R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>>> 
>>>   // If we run out of data, stop at the end of the stream.
>>>   if (BytesRead == 0) {
>>> @@ -416,7 +301,6 @@ public:
>>>   }
>>> }
>>> 
>>> -private:
>>> void SkipToFourByteBoundary() {
>>>   // If word_t is 64-bits and if we've read less than 32 bits, just dump
>>>   // the bits we have up to the next 32-bit boundary.
>>> @@ -429,7 +313,140 @@ private:
>>> 
>>>   BitsInCurWord = 0;
>>> }
>>> +
>>> +  /// Skip to the end of the file.
>>> +  void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
>>> +};
>>> +
>>> +/// When advancing through a bitstream cursor, each advance can discover a few
>>> +/// different kinds of entries:
>>> +struct BitstreamEntry {
>>> +  enum {
>>> +    Error,    // Malformed bitcode was found.
>>> +    EndBlock, // We've reached the end of the current block, (or the end of the
>>> +              // file, which is treated like a series of EndBlock records.
>>> +    SubBlock, // This is the start of a new subblock of a specific ID.
>>> +    Record    // This is a record with a specific AbbrevID.
>>> +  } Kind;
>>> +
>>> +  unsigned ID;
>>> +
>>> +  static BitstreamEntry getError() {
>>> +    BitstreamEntry E; E.Kind = Error; return E;
>>> +  }
>>> +  static BitstreamEntry getEndBlock() {
>>> +    BitstreamEntry E; E.Kind = EndBlock; return E;
>>> +  }
>>> +  static BitstreamEntry getSubBlock(unsigned ID) {
>>> +    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>>> +  }
>>> +  static BitstreamEntry getRecord(unsigned AbbrevID) {
>>> +    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>>> +  }
>>> +};
>>> +
>>> +/// This represents a position within a bitcode file, implemented on top of a
>>> +/// SimpleBitstreamCursor.
>>> +///
>>> +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>>> +/// be passed by value.
>>> +class BitstreamCursor : SimpleBitstreamCursor {
>> 
>> Is the private inheritance intended here?
>> 
> 
> Yes.

In my case I was trying to compute the hash of the module block when I see the module_hash record in llvm-bcanalyzer.
I planned to use getPointerToByte() for this, but I only have a BitstreamCursor.

-- 
Mehdi

>> 
>> 
>> 
>>> +  // This is the declared size of code values used for the current block, in
>>> +  // bits.
>>> +  unsigned CurCodeSize = 2;
>>> +
>>> +  /// Abbrevs installed at in this block.
>>> +  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>>> +
>>> +  struct Block {
>>> +    unsigned PrevCodeSize;
>>> +    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>>> +    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>>> +  };
>>> +
>>> +  /// This tracks the codesize of parent blocks.
>>> +  SmallVector<Block, 8> BlockScope;
>>> +
>>> +
>>> public:
>>> +  static const size_t MaxChunkSize = sizeof(word_t) * 8;
>>> +
>>> +  BitstreamCursor() = default;
>>> +
>>> +  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>>> +
>>> +  void init(BitstreamReader *R) {
>>> +    freeState();
>>> +    SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
>>> +    CurCodeSize = 2;
>>> +  }
>>> +
>>> +  void freeState();
>>> +
>>> +  using SimpleBitstreamCursor::canSkipToPos;
>>> +  using SimpleBitstreamCursor::AtEndOfStream;
>>> +  using SimpleBitstreamCursor::GetCurrentBitNo;
>>> +  using SimpleBitstreamCursor::getBitStreamReader;
>>> +  using SimpleBitstreamCursor::JumpToBit;
>>> +  using SimpleBitstreamCursor::fillCurWord;
>>> +  using SimpleBitstreamCursor::Read;
>>> +  using SimpleBitstreamCursor::ReadVBR;
>>> +  using SimpleBitstreamCursor::ReadVBR64;
>>> +
>>> +  /// Return the number of bits used to encode an abbrev #.
>>> +  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>>> +
>>> +  /// Flags that modify the behavior of advance().
>>> +  enum {
>>> +    /// If this flag is used, the advance() method does not automatically pop
>>> +    /// the block scope when the end of a block is reached.
>>> +    AF_DontPopBlockAtEnd = 1,
>>> +
>>> +    /// If this flag is used, abbrev entries are returned just like normal
>>> +    /// records.
>>> +    AF_DontAutoprocessAbbrevs = 2
>>> +  };
>>> +
>>> +  /// Advance the current bitstream, returning the next entry in the stream.
>>> +  BitstreamEntry advance(unsigned Flags = 0) {
>>> +    while (1) {
>>> +      unsigned Code = ReadCode();
>>> +      if (Code == bitc::END_BLOCK) {
>>> +        // Pop the end of the block unless Flags tells us not to.
>>> +        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>>> +          return BitstreamEntry::getError();
>>> +        return BitstreamEntry::getEndBlock();
>>> +      }
>>> +
>>> +      if (Code == bitc::ENTER_SUBBLOCK)
>>> +        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>>> +
>>> +      if (Code == bitc::DEFINE_ABBREV &&
>>> +          !(Flags & AF_DontAutoprocessAbbrevs)) {
>>> +        // We read and accumulate abbrev's, the client can't do anything with
>>> +        // them anyway.
>>> +        ReadAbbrevRecord();
>>> +        continue;
>>> +      }
>>> +
>>> +      return BitstreamEntry::getRecord(Code);
>>> +    }
>>> +  }
>>> +
>>> +  /// This is a convenience function for clients that don't expect any
>>> +  /// subblocks. This just skips over them automatically.
>>> +  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>>> +    while (1) {
>>> +      // If we found a normal entry, return it.
>>> +      BitstreamEntry Entry = advance(Flags);
>>> +      if (Entry.Kind != BitstreamEntry::SubBlock)
>>> +        return Entry;
>>> +
>>> +      // If we found a sub-block, just skip over it and check the next entry.
>>> +      if (SkipBlock())
>>> +        return BitstreamEntry::getError();
>>> +    }
>>> +  }
>>> 
>>> unsigned ReadCode() {
>>>   return Read(CurCodeSize);
>>> 
>>> Modified: llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp?rev=264545&r1=264544&r2=264545&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp (original)
>>> +++ llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp Sun Mar 27 17:40:55 2016
>>> @@ -32,7 +32,7 @@ bool BitstreamCursor::EnterSubBlock(unsi
>>> 
>>> // Add the abbrevs specific to this block to the CurAbbrevs list.
>>> if (const BitstreamReader::BlockInfo *Info =
>>> -      BitStream->getBlockInfo(BlockID)) {
>>> +          getBitStreamReader()->getBlockInfo(BlockID)) {
>>>   CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
>>>                     Info->Abbrevs.end());
>>> }
>>> @@ -164,7 +164,7 @@ void BitstreamCursor::skipRecord(unsigne
>>>   // If this would read off the end of the bitcode file, just set the
>>>   // record to empty and return.
>>>   if (!canSkipToPos(NewEnd/8)) {
>>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>>> +      skipToEnd();
>>>     break;
>>>   }
>>> 
>>> @@ -256,13 +256,14 @@ unsigned BitstreamCursor::readRecord(uns
>>>   // record to empty and return.
>>>   if (!canSkipToPos(NewEnd/8)) {
>>>     Vals.append(NumElts, 0);
>>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>>> +      skipToEnd();
>>>     break;
>>>   }
>>> 
>>>   // Otherwise, inform the streamer that we need these bytes in memory.
>>> -    const char *Ptr = (const char*)
>>> -      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
>>> +    const char *Ptr =
>>> +        (const char *)getBitStreamReader()->getBitcodeBytes().getPointer(
>>> +            CurBitPos / 8, NumElts);
>>> 
>>>   // If we can return a reference to the data, do so to avoid copying it.
>>>   if (Blob) {
>>> @@ -320,7 +321,7 @@ void BitstreamCursor::ReadAbbrevRecord()
>>> 
>>> bool BitstreamCursor::ReadBlockInfoBlock() {
>>> // If this is the second stream to get to the block info block, skip it.
>>> -  if (BitStream->hasBlockInfoRecords())
>>> +  if (getBitStreamReader()->hasBlockInfoRecords())
>>>   return SkipBlock();
>>> 
>>> if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
>>> @@ -361,11 +362,13 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>>     default: break;  // Default behavior, ignore unknown content.
>>>     case bitc::BLOCKINFO_CODE_SETBID:
>>>       if (Record.size() < 1) return true;
>>> -        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
>>> +        CurBlockInfo =
>>> +            &getBitStreamReader()->getOrCreateBlockInfo((unsigned)Record[0]);
>>>       break;
>>>     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
>>>       if (!CurBlockInfo) return true;
>>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>>> +          break; // Ignore name.
>>>       std::string Name;
>>>       for (unsigned i = 0, e = Record.size(); i != e; ++i)
>>>         Name += (char)Record[i];
>>> @@ -374,7 +377,8 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>>     }
>>>     case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
>>>       if (!CurBlockInfo) return true;
>>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>>> +          break; // Ignore name.
>>>       std::string Name;
>>>       for (unsigned i = 1, e = Record.size(); i != e; ++i)
>>>         Name += (char)Record[i];
>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>> 
>