[llvm] r287156 - Bitcode: Introduce initial multi-module reader API.

Peter Collingbourne via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 16 13:44:46 PST 2016


Author: pcc
Date: Wed Nov 16 15:44:45 2016
New Revision: 287156

URL: http://llvm.org/viewvc/llvm-project?rev=287156&view=rev
Log:
Bitcode: Introduce initial multi-module reader API.

Implement getLazyBitcodeModule() and parseBitcodeFile() in terms of it.

Differential Revision: https://reviews.llvm.org/D26719

Modified:
    llvm/trunk/include/llvm/Bitcode/BitcodeReader.h
    llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/trunk/test/Bitcode/invalid.test
    llvm/trunk/test/Bitcode/null-type.ll

Modified: llvm/trunk/include/llvm/Bitcode/BitcodeReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitcodeReader.h?rev=287156&r1=287155&r2=287156&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/BitcodeReader.h (original)
+++ llvm/trunk/include/llvm/Bitcode/BitcodeReader.h Wed Nov 16 15:44:45 2016
@@ -40,6 +40,44 @@ namespace llvm {
     return std::move(*Val);
   }
 
+  /// Represents a module in a bitcode file.
+  class BitcodeModule {
+    ArrayRef<uint8_t> Buffer;
+    StringRef ModuleIdentifier;
+
+    // The bitstream location of the IDENTIFICATION_BLOCK.
+    uint64_t IdentificationBit;
+
+    // The bitstream location of this module's MODULE_BLOCK.
+    uint64_t ModuleBit;
+
+    BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
+                  uint64_t IdentificationBit, uint64_t ModuleBit)
+        : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
+          IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
+
+    // Calls the ctor.
+    friend Expected<std::vector<BitcodeModule>>
+    getBitcodeModuleList(MemoryBufferRef Buffer);
+
+    Expected<std::unique_ptr<Module>>
+    getModuleImpl(LLVMContext &Context, bool MaterializeAll,
+                  bool ShouldLazyLoadMetadata);
+
+  public:
+    /// Read the bitcode module and prepare for lazy deserialization of function
+    /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
+    Expected<std::unique_ptr<Module>>
+    getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata);
+
+    /// Read the entire bitcode module and return it.
+    Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
+  };
+
+  /// Returns a list of modules in the specified bitcode buffer.
+  Expected<std::vector<BitcodeModule>>
+  getBitcodeModuleList(MemoryBufferRef Buffer);
+
   /// Read the header of the specified bitcode buffer and prepare for lazy
   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
   /// lazily load metadata as well.

Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=287156&r1=287155&r2=287156&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Wed Nov 16 15:44:45 2016
@@ -607,7 +607,8 @@ class BitcodeReader : public BitcodeRead
   std::vector<std::string> BundleTags;
 
 public:
-  BitcodeReader(BitstreamCursor Stream, LLVMContext &Context);
+  BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification,
+                LLVMContext &Context);
 
   Error materializeForwardReferencedFunctions();
 
@@ -841,9 +842,13 @@ std::error_code llvm::errorToErrorCodeAn
   return std::error_code();
 }
 
-BitcodeReader::BitcodeReader(BitstreamCursor Stream, LLVMContext &Context)
-    : BitcodeReaderBase(std::move(Stream)), Context(Context), ValueList(Context),
-      MetadataList(Context) {}
+BitcodeReader::BitcodeReader(BitstreamCursor Stream,
+                             StringRef ProducerIdentification,
+                             LLVMContext &Context)
+    : BitcodeReaderBase(std::move(Stream)), Context(Context),
+      ValueList(Context), MetadataList(Context) {
+  this->ProducerIdentification = ProducerIdentification;
+}
 
 Error BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
@@ -4365,36 +4370,7 @@ Error BitcodeReader::parseModule(uint64_
 
 Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata) {
   TheModule = M;
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (true) {
-    if (Stream.AtEndOfStream()) {
-      // We didn't really read a proper Module.
-      return error("Malformed IR file");
-    }
-
-    BitstreamEntry Entry =
-      Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
-
-    if (Entry.Kind != BitstreamEntry::SubBlock)
-      return error("Malformed block");
-
-    if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
-      Expected<std::string> ProducerIdentificationOrErr =
-          readIdentificationBlock(Stream);
-      if (!ProducerIdentificationOrErr)
-        return ProducerIdentificationOrErr.takeError();
-      ProducerIdentification = *ProducerIdentificationOrErr;
-      continue;
-    }
-
-    if (Entry.ID == bitc::MODULE_BLOCK_ID)
-      return parseModule(0, ShouldLazyLoadMetadata);
-
-    if (Stream.SkipBlock())
-      return error("Invalid record");
-  }
+  return parseModule(0, ShouldLazyLoadMetadata);
 }
 
 Error BitcodeReader::parseGlobalObjectAttachment(GlobalObject &GO,
@@ -6566,26 +6542,76 @@ const std::error_category &llvm::Bitcode
 // External interface
 //===----------------------------------------------------------------------===//
 
+Expected<std::vector<BitcodeModule>>
+llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
+  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
+  if (!StreamOrErr)
+    return StreamOrErr.takeError();
+  BitstreamCursor &Stream = *StreamOrErr;
+
+  uint64_t IdentificationBit = -1ull;
+  std::vector<BitcodeModule> Modules;
+  while (true) {
+    // We may be consuming bitcode from a client that leaves garbage at the end
+    // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
+    // the end that there cannot possibly be another module, stop looking.
+    if (Stream.getCurrentByteNo() + 8 >= Stream.getBitcodeBytes().size())
+      return Modules;
+
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case BitstreamEntry::EndBlock:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
+        IdentificationBit = Stream.GetCurrentBitNo();
+      else if (Entry.ID == bitc::MODULE_BLOCK_ID)
+        Modules.push_back({Stream.getBitcodeBytes(),
+                           Buffer.getBufferIdentifier(), IdentificationBit,
+                           Stream.GetCurrentBitNo()});
+
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
+  }
+}
+
 /// \brief Get a lazy one-at-time loading module from bitcode.
 ///
 /// This isn't always used in a lazy context.  In particular, it's also used by
-/// \a parseBitcodeFile().  If this is truly lazy, then we need to eagerly pull
+/// \a parseModule().  If this is truly lazy, then we need to eagerly pull
 /// in forward-referenced functions from block address references.
 ///
 /// \param[in] MaterializeAll Set to \c true if we should materialize
 /// everything.
-static Expected<std::unique_ptr<Module>>
-getLazyBitcodeModuleImpl(MemoryBufferRef Buffer, LLVMContext &Context,
-                         bool MaterializeAll,
-                         bool ShouldLazyLoadMetadata = false) {
-  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
-  if (!StreamOrErr)
-    return StreamOrErr.takeError();
-
-  BitcodeReader *R = new BitcodeReader(std::move(*StreamOrErr), Context);
+Expected<std::unique_ptr<Module>>
+BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
+                             bool ShouldLazyLoadMetadata) {
+  BitstreamCursor Stream(Buffer);
+
+  std::string ProducerIdentification;
+  if (IdentificationBit != -1ull) {
+    Stream.JumpToBit(IdentificationBit);
+    Expected<std::string> ProducerIdentificationOrErr =
+        readIdentificationBlock(Stream);
+    if (!ProducerIdentificationOrErr)
+      return ProducerIdentificationOrErr.takeError();
+
+    ProducerIdentification = *ProducerIdentificationOrErr;
+  }
+
+  Stream.JumpToBit(ModuleBit);
+  auto *R =
+      new BitcodeReader(std::move(Stream), ProducerIdentification, Context);
 
   std::unique_ptr<Module> M =
-      llvm::make_unique<Module>(Buffer.getBufferIdentifier(), Context);
+      llvm::make_unique<Module>(ModuleIdentifier, Context);
   M->setMaterializer(R);
 
   // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
@@ -6605,10 +6631,22 @@ getLazyBitcodeModuleImpl(MemoryBufferRef
 }
 
 Expected<std::unique_ptr<Module>>
+BitcodeModule::getLazyModule(LLVMContext &Context,
+                             bool ShouldLazyLoadMetadata) {
+  return getModuleImpl(Context, false, ShouldLazyLoadMetadata);
+}
+
+Expected<std::unique_ptr<Module>>
 llvm::getLazyBitcodeModule(MemoryBufferRef Buffer,
                            LLVMContext &Context, bool ShouldLazyLoadMetadata) {
-  return getLazyBitcodeModuleImpl(Buffer, Context, false,
-                                  ShouldLazyLoadMetadata);
+  Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
+  if (!MsOrErr)
+    return MsOrErr.takeError();
+
+  if (MsOrErr->size() != 1)
+    return error("Expected a single module");
+
+  return (*MsOrErr)[0].getLazyModule(Context, ShouldLazyLoadMetadata);
 }
 
 Expected<std::unique_ptr<Module>>
@@ -6621,13 +6659,25 @@ llvm::getOwningLazyBitcodeModule(std::un
   return MOrErr;
 }
 
-Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
-                                                         LLVMContext &Context) {
-  return getLazyBitcodeModuleImpl(Buffer, Context, true);
+Expected<std::unique_ptr<Module>>
+BitcodeModule::parseModule(LLVMContext &Context) {
+  return getModuleImpl(Context, true, false);
   // TODO: Restore the use-lists to the in-memory state when the bitcode was
   // written.  We must defer until the Module has been fully materialized.
 }
 
+Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
+                                                         LLVMContext &Context) {
+  Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
+  if (!MsOrErr)
+    return MsOrErr.takeError();
+
+  if (MsOrErr->size() != 1)
+    return error("Expected a single module");
+
+  return (*MsOrErr)[0].parseModule(Context);
+}
+
 Expected<std::string> llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)

Modified: llvm/trunk/test/Bitcode/invalid.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/invalid.test?rev=287156&r1=287155&r2=287156&view=diff
==============================================================================
--- llvm/trunk/test/Bitcode/invalid.test (original)
+++ llvm/trunk/test/Bitcode/invalid.test Wed Nov 16 15:44:45 2016
@@ -31,11 +31,11 @@ RUN:   FileCheck --check-prefix=NON-FUNC
 
 INVALID-EMPTY: Invalid bitcode signature
 INVALID-ENCODING: Invalid encoding
-BAD-ABBREV: Abbreviation starts with an Array or a Blob
-UNEXPECTED-EOF: Unexpected end of file
-BAD-ABBREV-NUMBER: Invalid abbrev number
+BAD-ABBREV: Malformed block
+UNEXPECTED-EOF: Malformed block
+BAD-ABBREV-NUMBER: Malformed block
 BAD-TYPE-TABLE-FORWARD-REF: Invalid TYPE table: Only named structs can be forward referenced
-BAD-BITWIDTH: Bitwidth for integer type out of range
+BAD-BITWIDTH: Malformed block
 BAD-ALIGN: Invalid alignment value
 MISMATCHED-EXPLICIT-GEP: Explicit gep type does not match pointee type of pointer operand
 MISMATCHED-EXPLICIT-LOAD: Explicit load/store type does not match pointee type of pointer operand
@@ -69,7 +69,7 @@ INSERT-IDXS: INSERTVAL: Invalid type
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-proper-module.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=NO-MODULE %s
 
-NO-MODULE: Malformed IR file
+NO-MODULE: Expected a single module
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-fp-shift.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=FP-SHIFT %s
@@ -105,7 +105,7 @@ FWDREF-TYPE: Invalid record
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-fwdref-type-mismatch-2.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=FWDREF-TYPE-MISMATCH %s
 
-FWDREF-TYPE-MISMATCH: Type mismatch in constant table!
+FWDREF-TYPE-MISMATCH: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-element-type.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=ELEMENT-TYPE %s
@@ -154,7 +154,7 @@ EXTRACT-0-IDXS: EXTRACTVAL: Invalid inst
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-load-ptr-type.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=BAD-LOAD-PTR-TYPE %s
 
-BAD-LOAD-PTR-TYPE: Cannot load/store from pointer
+BAD-LOAD-PTR-TYPE: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-inserted-value-type-mismatch.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=INSERT-TYPE-MISMATCH %s
@@ -174,7 +174,7 @@ INVALID-ARGUMENT-TYPE: Invalid function
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-function-comdat-id.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=INVALID-FCOMDAT-ID %s
 
-INVALID-FCOMDAT-ID: Invalid function comdat ID
+INVALID-FCOMDAT-ID: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-global-var-comdat-id.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=INVALID-GVCOMDAT-ID %s
@@ -189,12 +189,12 @@ ABBREV-NO-OPS: Abbrev record with no ope
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-operand-encoding.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=ARRAY-OP-ENC %s
 
-ARRAY-OP-ENC: Array element type has to be an encoding of a type
+ARRAY-OP-ENC: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-metadata-not-followed-named-node.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=META-NOT-FOLLOWED-BY-NAMED-META %s
 
-META-NOT-FOLLOWED-BY-NAMED-META: METADATA_NAME not followed by METADATA_NAMED_NODE
+META-NOT-FOLLOWED-BY-NAMED-META: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-vector-length.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=VECTOR-LENGTH %s
@@ -214,7 +214,7 @@ NO-FUNCTION-BLOCK: Trying to materialize
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=NAME-WITH-0 %s
 
-NAME-WITH-0: Invalid value name
+NAME-WITH-0: Malformed block
 
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-void-constant.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=VOID-CONSTANT-TYPE %s

Modified: llvm/trunk/test/Bitcode/null-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/null-type.ll?rev=287156&r1=287155&r2=287156&view=diff
==============================================================================
--- llvm/trunk/test/Bitcode/null-type.ll (original)
+++ llvm/trunk/test/Bitcode/null-type.ll Wed Nov 16 15:44:45 2016
@@ -1,4 +1,4 @@
 ; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s
 ; PR8494
 
-; CHECK: Invalid record
+; CHECK: Malformed block




More information about the llvm-commits mailing list