[llvm] r264545 - Bitcode: Split out SimpleBitstreamCursor

Mon Mar 28 22:37:49 PDT 2016

> On 2016-Mar-28, at 22:05, Mehdi Amini <mehdi.amini at apple.com> wrote:
> 
>> 
>> On Mar 28, 2016, at 9:52 PM, Duncan P. N. Exon Smith <dexonsmith at apple.com> wrote:
>> 
>> 
>>> On 2016-Mar-28, at 21:51, Mehdi Amini <mehdi.amini at apple.com> wrote:
>>> 
>>> 
>>>> On Mar 27, 2016, at 3:40 PM, Duncan P. N. Exon Smith via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>>>> 
>>>> Author: dexonsmith
>>>> Date: Sun Mar 27 17:40:55 2016
>>>> New Revision: 264545
>>>> 
>>>> URL: http://llvm.org/viewvc/llvm-project?rev=264545&view=rev
>>>> Log:
>>>> Bitcode: Split out SimpleBitstreamCursor
>>>> 
>>>> Split out SimpleBitstreamCursor from BitstreamCursor, which is a
>>>> lower-level cursor with no knowledge of bitcode blocks, abbreviations,
>>>> or records.  It just knows how to read bits and navigate the stream.
>>>> 
>>>> This is mainly organizational, to separate the API for manipulating raw
>>>> bits from that for bitcode concepts like Record and Block.
>>>> 
>>>> Modified:
>>>> llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>>>> llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>>>> 
>>>> Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=264545&r1=264544&r2=264545&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
>>>> +++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Sun Mar 27 17:40:55 2016
>>>> @@ -128,98 +128,42 @@ public:
>>>> }
>>>> };
>>>> 
>>>> -/// When advancing through a bitstream cursor, each advance can discover a few
>>>> -/// different kinds of entries:
>>>> -struct BitstreamEntry {
>>>> -  enum {
>>>> -    Error,    // Malformed bitcode was found.
>>>> -    EndBlock, // We've reached the end of the current block, (or the end of the
>>>> -              // file, which is treated like a series of EndBlock records.
>>>> -    SubBlock, // This is the start of a new subblock of a specific ID.
>>>> -    Record    // This is a record with a specific AbbrevID.
>>>> -  } Kind;
>>>> -
>>>> -  unsigned ID;
>>>> -
>>>> -  static BitstreamEntry getError() {
>>>> -    BitstreamEntry E; E.Kind = Error; return E;
>>>> -  }
>>>> -  static BitstreamEntry getEndBlock() {
>>>> -    BitstreamEntry E; E.Kind = EndBlock; return E;
>>>> -  }
>>>> -  static BitstreamEntry getSubBlock(unsigned ID) {
>>>> -    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>>>> -  }
>>>> -  static BitstreamEntry getRecord(unsigned AbbrevID) {
>>>> -    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>>>> -  }
>>>> -};
>>>> -
>>>> -/// This represents a position within a bitcode file. There may be multiple
>>>> -/// independent cursors reading within one bitstream, each maintaining their own
>>>> -/// local state.
>>>> -///
>>>> -/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>>>> -/// be passed by value.
>>>> -class BitstreamCursor {
>>>> -  BitstreamReader *BitStream;
>>>> -  size_t NextChar;
>>>> +/// This represents a position within a bitstream. There may be multiple
>>>> +/// independent cursors reading within one bitstream, each maintaining their
>>>> +/// own local state.
>>>> +class SimpleBitstreamCursor {
>>>> +  BitstreamReader *R = nullptr;
>>>> +  size_t NextChar = 0;
>>>> 
>>>> // The size of the bicode. 0 if we don't know it yet.
>>>> -  size_t Size;
>>>> +  size_t Size = 0;
>>>> 
>>>> /// This is the current data we have pulled from the stream but have not
>>>> /// returned to the client. This is specifically and intentionally defined to
>>>> /// follow the word size of the host machine for efficiency. We use word_t in
>>>> /// places that are aware of this to make it perfectly explicit what is going
>>>> /// on.
>>>> +public:
>>>> typedef size_t word_t;
>>>> -  word_t CurWord;
>>>> +
>>>> +private:
>>>> +  word_t CurWord = 0;
>>>> 
>>>> /// This is the number of bits in CurWord that are valid. This is always from
>>>> /// [0...bits_of(size_t)-1] inclusive.
>>>> -  unsigned BitsInCurWord;
>>>> -
>>>> -  // This is the declared size of code values used for the current block, in
>>>> -  // bits.
>>>> -  unsigned CurCodeSize;
>>>> -
>>>> -  /// Abbrevs installed at in this block.
>>>> -  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>>>> -
>>>> -  struct Block {
>>>> -    unsigned PrevCodeSize;
>>>> -    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>>>> -    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>>>> -  };
>>>> -
>>>> -  /// This tracks the codesize of parent blocks.
>>>> -  SmallVector<Block, 8> BlockScope;
>>>> -
>>>> +  unsigned BitsInCurWord = 0;
>>>> 
>>>> public:
>>>> static const size_t MaxChunkSize = sizeof(word_t) * 8;
>>>> 
>>>> -  BitstreamCursor() { init(nullptr); }
>>>> -
>>>> -  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>>>> -
>>>> -  void init(BitstreamReader *R) {
>>>> -    freeState();
>>>> +  SimpleBitstreamCursor() = default;
>>>> 
>>>> -    BitStream = R;
>>>> -    NextChar = 0;
>>>> -    Size = 0;
>>>> -    BitsInCurWord = 0;
>>>> -    CurCodeSize = 2;
>>>> -  }
>>>> -
>>>> -  void freeState();
>>>> +  explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
>>>> 
>>>> bool canSkipToPos(size_t pos) const {
>>>>  // pos can be skipped to if it is a valid address or one byte past the end.
>>>> -    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
>>>> -        static_cast<uint64_t>(pos - 1));
>>>> +    return pos == 0 ||
>>>> +           R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
>>>> }
>>>> 
>>>> bool AtEndOfStream() {
>>>> @@ -231,72 +175,13 @@ public:
>>>>  return BitsInCurWord == 0;
>>>> }
>>>> 
>>>> -  /// Return the number of bits used to encode an abbrev #.
>>>> -  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>>>> -
>>>> /// Return the bit # of the bit we are reading.
>>>> uint64_t GetCurrentBitNo() const {
>>>>  return NextChar*CHAR_BIT - BitsInCurWord;
>>>> }
>>>> 
>>>> -  BitstreamReader *getBitStreamReader() {
>>>> -    return BitStream;
>>>> -  }
>>>> -  const BitstreamReader *getBitStreamReader() const {
>>>> -    return BitStream;
>>>> -  }
>>>> -
>>>> -  /// Flags that modify the behavior of advance().
>>>> -  enum {
>>>> -    /// If this flag is used, the advance() method does not automatically pop
>>>> -    /// the block scope when the end of a block is reached.
>>>> -    AF_DontPopBlockAtEnd = 1,
>>>> -
>>>> -    /// If this flag is used, abbrev entries are returned just like normal
>>>> -    /// records.
>>>> -    AF_DontAutoprocessAbbrevs = 2
>>>> -  };
>>>> -
>>>> -  /// Advance the current bitstream, returning the next entry in the stream.
>>>> -  BitstreamEntry advance(unsigned Flags = 0) {
>>>> -    while (1) {
>>>> -      unsigned Code = ReadCode();
>>>> -      if (Code == bitc::END_BLOCK) {
>>>> -        // Pop the end of the block unless Flags tells us not to.
>>>> -        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>>>> -          return BitstreamEntry::getError();
>>>> -        return BitstreamEntry::getEndBlock();
>>>> -      }
>>>> -
>>>> -      if (Code == bitc::ENTER_SUBBLOCK)
>>>> -        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>>>> -
>>>> -      if (Code == bitc::DEFINE_ABBREV &&
>>>> -          !(Flags & AF_DontAutoprocessAbbrevs)) {
>>>> -        // We read and accumulate abbrev's, the client can't do anything with
>>>> -        // them anyway.
>>>> -        ReadAbbrevRecord();
>>>> -        continue;
>>>> -      }
>>>> -
>>>> -      return BitstreamEntry::getRecord(Code);
>>>> -    }
>>>> -  }
>>>> -
>>>> -  /// This is a convenience function for clients that don't expect any
>>>> -  /// subblocks. This just skips over them automatically.
>>>> -  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>>>> -    while (1) {
>>>> -      // If we found a normal entry, return it.
>>>> -      BitstreamEntry Entry = advance(Flags);
>>>> -      if (Entry.Kind != BitstreamEntry::SubBlock)
>>>> -        return Entry;
>>>> -
>>>> -      // If we found a sub-block, just skip over it and check the next entry.
>>>> -      if (SkipBlock())
>>>> -        return BitstreamEntry::getError();
>>>> -    }
>>>> -  }
>>>> +  BitstreamReader *getBitStreamReader() { return R; }
>>>> +  const BitstreamReader *getBitStreamReader() const { return R; }
>>>> 
>>>> /// Reset the stream to the specified bit number.
>>>> void JumpToBit(uint64_t BitNo) {
>>>> @@ -321,7 +206,7 @@ public:
>>>>  uint8_t Array[sizeof(word_t)] = {0};
>>>> 
>>>>  uint64_t BytesRead =
>>>> -        BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>>>> +        R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
>>>> 
>>>>  // If we run out of data, stop at the end of the stream.
>>>>  if (BytesRead == 0) {
>>>> @@ -416,7 +301,6 @@ public:
>>>>  }
>>>> }
>>>> 
>>>> -private:
>>>> void SkipToFourByteBoundary() {
>>>>  // If word_t is 64-bits and if we've read less than 32 bits, just dump
>>>>  // the bits we have up to the next 32-bit boundary.
>>>> @@ -429,7 +313,140 @@ private:
>>>> 
>>>>  BitsInCurWord = 0;
>>>> }
>>>> +
>>>> +  /// Skip to the end of the file.
>>>> +  void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
>>>> +};
>>>> +
>>>> +/// When advancing through a bitstream cursor, each advance can discover a few
>>>> +/// different kinds of entries:
>>>> +struct BitstreamEntry {
>>>> +  enum {
>>>> +    Error,    // Malformed bitcode was found.
>>>> +    EndBlock, // We've reached the end of the current block, (or the end of the
>>>> +              // file, which is treated like a series of EndBlock records.
>>>> +    SubBlock, // This is the start of a new subblock of a specific ID.
>>>> +    Record    // This is a record with a specific AbbrevID.
>>>> +  } Kind;
>>>> +
>>>> +  unsigned ID;
>>>> +
>>>> +  static BitstreamEntry getError() {
>>>> +    BitstreamEntry E; E.Kind = Error; return E;
>>>> +  }
>>>> +  static BitstreamEntry getEndBlock() {
>>>> +    BitstreamEntry E; E.Kind = EndBlock; return E;
>>>> +  }
>>>> +  static BitstreamEntry getSubBlock(unsigned ID) {
>>>> +    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
>>>> +  }
>>>> +  static BitstreamEntry getRecord(unsigned AbbrevID) {
>>>> +    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
>>>> +  }
>>>> +};
>>>> +
>>>> +/// This represents a position within a bitcode file, implemented on top of a
>>>> +/// SimpleBitstreamCursor.
>>>> +///
>>>> +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
>>>> +/// be passed by value.
>>>> +class BitstreamCursor : SimpleBitstreamCursor {
>>> 
>>> Is the private inheritance intended here?
>>> 
>> 
>> Yes.
> 
> In my case I was trying to compute the hash of the module block when I see the module_hash record in llvm-bcanalyzer.
> I planned to use getPointerToByte() for this, but I only have a BitstreamCursor.

I left the new low-level functions in SimpleBitstreamCursor out of
BitstreamCursor since I didn't see a use for them there (and some of
them might be error prone).  Just expose what you need by adding
another `using` clause to BitstreamCursor.

You could also construct a SimpleBitstreamCursor (they're cheap), but
I don't see any reason not to expose this to the higher-level API.

> -- 
> Mehdi
> 
> 
> 
>>> 
>>> 
>>> 
>>>> +  // This is the declared size of code values used for the current block, in
>>>> +  // bits.
>>>> +  unsigned CurCodeSize = 2;
>>>> +
>>>> +  /// Abbrevs installed at in this block.
>>>> +  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
>>>> +
>>>> +  struct Block {
>>>> +    unsigned PrevCodeSize;
>>>> +    std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
>>>> +    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
>>>> +  };
>>>> +
>>>> +  /// This tracks the codesize of parent blocks.
>>>> +  SmallVector<Block, 8> BlockScope;
>>>> +
>>>> +
>>>> public:
>>>> +  static const size_t MaxChunkSize = sizeof(word_t) * 8;
>>>> +
>>>> +  BitstreamCursor() = default;
>>>> +
>>>> +  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
>>>> +
>>>> +  void init(BitstreamReader *R) {
>>>> +    freeState();
>>>> +    SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
>>>> +    CurCodeSize = 2;
>>>> +  }
>>>> +
>>>> +  void freeState();
>>>> +
>>>> +  using SimpleBitstreamCursor::canSkipToPos;
>>>> +  using SimpleBitstreamCursor::AtEndOfStream;
>>>> +  using SimpleBitstreamCursor::GetCurrentBitNo;
>>>> +  using SimpleBitstreamCursor::getBitStreamReader;
>>>> +  using SimpleBitstreamCursor::JumpToBit;
>>>> +  using SimpleBitstreamCursor::fillCurWord;
>>>> +  using SimpleBitstreamCursor::Read;
>>>> +  using SimpleBitstreamCursor::ReadVBR;
>>>> +  using SimpleBitstreamCursor::ReadVBR64;
>>>> +
>>>> +  /// Return the number of bits used to encode an abbrev #.
>>>> +  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
>>>> +
>>>> +  /// Flags that modify the behavior of advance().
>>>> +  enum {
>>>> +    /// If this flag is used, the advance() method does not automatically pop
>>>> +    /// the block scope when the end of a block is reached.
>>>> +    AF_DontPopBlockAtEnd = 1,
>>>> +
>>>> +    /// If this flag is used, abbrev entries are returned just like normal
>>>> +    /// records.
>>>> +    AF_DontAutoprocessAbbrevs = 2
>>>> +  };
>>>> +
>>>> +  /// Advance the current bitstream, returning the next entry in the stream.
>>>> +  BitstreamEntry advance(unsigned Flags = 0) {
>>>> +    while (1) {
>>>> +      unsigned Code = ReadCode();
>>>> +      if (Code == bitc::END_BLOCK) {
>>>> +        // Pop the end of the block unless Flags tells us not to.
>>>> +        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
>>>> +          return BitstreamEntry::getError();
>>>> +        return BitstreamEntry::getEndBlock();
>>>> +      }
>>>> +
>>>> +      if (Code == bitc::ENTER_SUBBLOCK)
>>>> +        return BitstreamEntry::getSubBlock(ReadSubBlockID());
>>>> +
>>>> +      if (Code == bitc::DEFINE_ABBREV &&
>>>> +          !(Flags & AF_DontAutoprocessAbbrevs)) {
>>>> +        // We read and accumulate abbrev's, the client can't do anything with
>>>> +        // them anyway.
>>>> +        ReadAbbrevRecord();
>>>> +        continue;
>>>> +      }
>>>> +
>>>> +      return BitstreamEntry::getRecord(Code);
>>>> +    }
>>>> +  }
>>>> +
>>>> +  /// This is a convenience function for clients that don't expect any
>>>> +  /// subblocks. This just skips over them automatically.
>>>> +  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
>>>> +    while (1) {
>>>> +      // If we found a normal entry, return it.
>>>> +      BitstreamEntry Entry = advance(Flags);
>>>> +      if (Entry.Kind != BitstreamEntry::SubBlock)
>>>> +        return Entry;
>>>> +
>>>> +      // If we found a sub-block, just skip over it and check the next entry.
>>>> +      if (SkipBlock())
>>>> +        return BitstreamEntry::getError();
>>>> +    }
>>>> +  }
>>>> 
>>>> unsigned ReadCode() {
>>>>  return Read(CurCodeSize);
>>>> 
>>>> Modified: llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp?rev=264545&r1=264544&r2=264545&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp (original)
>>>> +++ llvm/trunk/lib/Bitcode/Reader/BitstreamReader.cpp Sun Mar 27 17:40:55 2016
>>>> @@ -32,7 +32,7 @@ bool BitstreamCursor::EnterSubBlock(unsi
>>>> 
>>>> // Add the abbrevs specific to this block to the CurAbbrevs list.
>>>> if (const BitstreamReader::BlockInfo *Info =
>>>> -      BitStream->getBlockInfo(BlockID)) {
>>>> +          getBitStreamReader()->getBlockInfo(BlockID)) {
>>>>  CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
>>>>                    Info->Abbrevs.end());
>>>> }
>>>> @@ -164,7 +164,7 @@ void BitstreamCursor::skipRecord(unsigne
>>>>  // If this would read off the end of the bitcode file, just set the
>>>>  // record to empty and return.
>>>>  if (!canSkipToPos(NewEnd/8)) {
>>>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>>>> +      skipToEnd();
>>>>    break;
>>>>  }
>>>> 
>>>> @@ -256,13 +256,14 @@ unsigned BitstreamCursor::readRecord(uns
>>>>  // record to empty and return.
>>>>  if (!canSkipToPos(NewEnd/8)) {
>>>>    Vals.append(NumElts, 0);
>>>> -      NextChar = BitStream->getBitcodeBytes().getExtent();
>>>> +      skipToEnd();
>>>>    break;
>>>>  }
>>>> 
>>>>  // Otherwise, inform the streamer that we need these bytes in memory.
>>>> -    const char *Ptr = (const char*)
>>>> -      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
>>>> +    const char *Ptr =
>>>> +        (const char *)getBitStreamReader()->getBitcodeBytes().getPointer(
>>>> +            CurBitPos / 8, NumElts);
>>>> 
>>>>  // If we can return a reference to the data, do so to avoid copying it.
>>>>  if (Blob) {
>>>> @@ -320,7 +321,7 @@ void BitstreamCursor::ReadAbbrevRecord()
>>>> 
>>>> bool BitstreamCursor::ReadBlockInfoBlock() {
>>>> // If this is the second stream to get to the block info block, skip it.
>>>> -  if (BitStream->hasBlockInfoRecords())
>>>> +  if (getBitStreamReader()->hasBlockInfoRecords())
>>>>  return SkipBlock();
>>>> 
>>>> if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
>>>> @@ -361,11 +362,13 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>>>    default: break;  // Default behavior, ignore unknown content.
>>>>    case bitc::BLOCKINFO_CODE_SETBID:
>>>>      if (Record.size() < 1) return true;
>>>> -        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
>>>> +        CurBlockInfo =
>>>> +            &getBitStreamReader()->getOrCreateBlockInfo((unsigned)Record[0]);
>>>>      break;
>>>>    case bitc::BLOCKINFO_CODE_BLOCKNAME: {
>>>>      if (!CurBlockInfo) return true;
>>>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>>>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>>>> +          break; // Ignore name.
>>>>      std::string Name;
>>>>      for (unsigned i = 0, e = Record.size(); i != e; ++i)
>>>>        Name += (char)Record[i];
>>>> @@ -374,7 +377,8 @@ bool BitstreamCursor::ReadBlockInfoBlock
>>>>    }
>>>>    case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
>>>>      if (!CurBlockInfo) return true;
>>>> -        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
>>>> +        if (getBitStreamReader()->isIgnoringBlockInfoNames())
>>>> +          break; // Ignore name.
>>>>      std::string Name;
>>>>      for (unsigned i = 1, e = Record.size(); i != e; ++i)
>>>>        Name += (char)Record[i];
>>>> 
>>>> 
>>>> _______________________________________________
>>>> llvm-commits mailing list
>>>> llvm-commits at lists.llvm.org
>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits