[llvm-commits] [llvm] r149918 - in /llvm/trunk: include/llvm/Bitcode/ include/llvm/MC/ include/llvm/Support/ lib/Bitcode/Reader/ lib/Bitcode/Writer/ lib/MC/MCDisassembler/ lib/Support/ lib/Target/ARM/Disassembler/ lib/Target/MBlaze/Disassembler/ lib/Target/X86/Disassembler/ tools/llvm-bcanalyzer/ tools/llvm-dis/ tools/llvm-mc/ tools/llvm-objdump/

Derek Schuff dschuff at google.com
Mon Feb 6 14:30:30 PST 2012


Author: dschuff
Date: Mon Feb  6 16:30:29 2012
New Revision: 149918

URL: http://llvm.org/viewvc/llvm-project?rev=149918&view=rev
Log:
Enable streaming of bitcode

This CL delays reading of function bodies from initial parse until
materialization, allowing overlap of compilation with bitcode download.


Added:
    llvm/trunk/include/llvm/Support/DataStream.h
    llvm/trunk/include/llvm/Support/StreamableMemoryObject.h
    llvm/trunk/lib/Support/DataStream.cpp
    llvm/trunk/lib/Support/StreamableMemoryObject.cpp
Modified:
    llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
    llvm/trunk/include/llvm/Bitcode/ReaderWriter.h
    llvm/trunk/include/llvm/MC/MCDisassembler.h
    llvm/trunk/include/llvm/Support/MemoryObject.h
    llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/trunk/lib/Bitcode/Reader/BitcodeReader.h
    llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp
    llvm/trunk/lib/MC/MCDisassembler/EDDisassembler.cpp
    llvm/trunk/lib/Support/CMakeLists.txt
    llvm/trunk/lib/Support/MemoryObject.cpp
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
    llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
    llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
    llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp
    llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h
    llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
    llvm/trunk/tools/llvm-dis/llvm-dis.cpp
    llvm/trunk/tools/llvm-mc/Disassembler.cpp
    llvm/trunk/tools/llvm-objdump/MCFunction.cpp
    llvm/trunk/tools/llvm-objdump/MCFunction.h
    llvm/trunk/tools/llvm-objdump/llvm-objdump.h

Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
+++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Mon Feb  6 16:30:29 2012
@@ -15,10 +15,12 @@
 #ifndef BITSTREAM_READER_H
 #define BITSTREAM_READER_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <climits>
 #include <string>
 #include <vector>
+#include "llvm/Support/StreamableMemoryObject.h"
 
 namespace llvm {
 
@@ -36,9 +38,7 @@
     std::vector<std::pair<unsigned, std::string> > RecordNames;
   };
 private:
-  /// FirstChar/LastChar - This remembers the first and last bytes of the
-  /// stream.
-  const unsigned char *FirstChar, *LastChar;
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
   
   std::vector<BlockInfo> BlockInfoRecords;
 
@@ -47,10 +47,10 @@
   /// uses this.
   bool IgnoreBlockInfoNames;
   
-  BitstreamReader(const BitstreamReader&);  // NOT IMPLEMENTED
-  void operator=(const BitstreamReader&);  // NOT IMPLEMENTED
+  BitstreamReader(const BitstreamReader&);  // DO NOT IMPLEMENT
+  void operator=(const BitstreamReader&);  // DO NOT IMPLEMENT
 public:
-  BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
+  BitstreamReader() : IgnoreBlockInfoNames(true) {
   }
 
   BitstreamReader(const unsigned char *Start, const unsigned char *End) {
@@ -58,12 +58,17 @@
     init(Start, End);
   }
 
+  BitstreamReader(StreamableMemoryObject *bytes) {
+    BitcodeBytes.reset(bytes);
+  }
+
   void init(const unsigned char *Start, const unsigned char *End) {
-    FirstChar = Start;
-    LastChar = End;
     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
   }
 
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
   ~BitstreamReader() {
     // Free the BlockInfoRecords.
     while (!BlockInfoRecords.empty()) {
@@ -75,9 +80,6 @@
       BlockInfoRecords.pop_back();
     }
   }
-  
-  const unsigned char *getFirstChar() const { return FirstChar; }
-  const unsigned char *getLastChar() const { return LastChar; }
 
   /// CollectBlockInfoNames - This is called by clients that want block/record
   /// name information.
@@ -122,7 +124,7 @@
 class BitstreamCursor {
   friend class Deserializer;
   BitstreamReader *BitStream;
-  const unsigned char *NextChar;
+  size_t NextChar;
   
   /// CurWord - This is the current data we have pulled from the stream but have
   /// not returned to the client.
@@ -156,8 +158,7 @@
   }
   
   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
@@ -167,8 +168,7 @@
     freeState();
     
     BitStream = &R;
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
@@ -225,13 +225,38 @@
   /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
   unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
   
-  bool AtEndOfStream() const {
-    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  unsigned char getByte(size_t pos) {
+    uint8_t byte = -1;
+    BitStream->getBitcodeBytes().readByte(pos, &byte);
+    return byte;
+  }
+
+  uint32_t getWord(size_t pos) {
+    uint32_t word = -1;
+    BitStream->getBitcodeBytes().readBytes(pos,
+                                           sizeof(word),
+                                           reinterpret_cast<uint8_t *>(&word),
+                                           NULL);
+    return word;
+  }
+
+  bool AtEndOfStream() {
+    return isEndPos(NextChar) && BitsInCurWord == 0;
   }
   
   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
   uint64_t GetCurrentBitNo() const {
-    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+    return NextChar*CHAR_BIT - BitsInCurWord;
   }
   
   BitstreamReader *getBitStreamReader() {
@@ -246,12 +271,10 @@
   void JumpToBit(uint64_t BitNo) {
     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
     uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
-    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
-                                 BitStream->getFirstChar()) &&
-           "Invalid location");
+    assert(canSkipToPos(ByteNo) && "Invalid location");
     
     // Move the cursor to the right word.
-    NextChar = BitStream->getFirstChar()+ByteNo;
+    NextChar = ByteNo;
     BitsInCurWord = 0;
     CurWord = 0;
     
@@ -272,7 +295,7 @@
     }
 
     // If we run out of data, stop at the end of the stream.
-    if (NextChar == BitStream->getLastChar()) {
+    if (isEndPos(NextChar)) {
       CurWord = 0;
       BitsInCurWord = 0;
       return 0;
@@ -281,8 +304,7 @@
     unsigned R = CurWord;
 
     // Read the next word from the stream.
-    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
-              (NextChar[2] << 16) | (NextChar[3] << 24);
+    CurWord = getWord(NextChar);
     NextChar += 4;
 
     // Extract NumBits-BitsInCurWord from what we just read.
@@ -376,9 +398,8 @@
 
     // Check that the block wasn't partially defined, and that the offset isn't
     // bogus.
-    const unsigned char *const SkipTo = NextChar + NumWords*4;
-    if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||
-                           SkipTo < BitStream->getFirstChar())
+    size_t SkipTo = NextChar + NumWords*4;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo))
       return true;
 
     NextChar = SkipTo;
@@ -409,8 +430,7 @@
     if (NumWordsP) *NumWordsP = NumWords;
 
     // Validate that this block is sane.
-    if (CurCodeSize == 0 || AtEndOfStream() ||
-        NextChar+NumWords*4 > BitStream->getLastChar())
+    if (CurCodeSize == 0 || AtEndOfStream())
       return true;
 
     return false;
@@ -512,24 +532,25 @@
         SkipToWord();  // 32-bit alignment
 
         // Figure out where the end of this blob will be including tail padding.
-        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+        size_t NewEnd = NextChar+((NumElts+3)&~3);
         
         // If this would read off the end of the bitcode file, just set the
         // record to empty and return.
-        if (NewEnd > BitStream->getLastChar()) {
+        if (!canSkipToPos(NewEnd)) {
           Vals.append(NumElts, 0);
-          NextChar = BitStream->getLastChar();
+          NextChar = BitStream->getBitcodeBytes().getExtent();
           break;
         }
         
         // Otherwise, read the number of bytes.  If we can return a reference to
         // the data, do so to avoid copying it.
         if (BlobStart) {
-          *BlobStart = (const char*)NextChar;
+          *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
+              NextChar, NumElts);
           *BlobLen = NumElts;
         } else {
           for (; NumElts; ++NextChar, --NumElts)
-            Vals.push_back(*NextChar);
+            Vals.push_back(getByte(NextChar));
         }
         // Skip over tail padding.
         NextChar = NewEnd;

Modified: llvm/trunk/include/llvm/Bitcode/ReaderWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/ReaderWriter.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/ReaderWriter.h (original)
+++ llvm/trunk/include/llvm/Bitcode/ReaderWriter.h Mon Feb  6 16:30:29 2012
@@ -17,35 +17,45 @@
 #include <string>
 
 namespace llvm {
-  class Module;
-  class MemoryBuffer;
-  class ModulePass;
   class BitstreamWriter;
+  class MemoryBuffer;
+  class DataStreamer;
   class LLVMContext;
+  class Module;
+  class ModulePass;
   class raw_ostream;
-  
+
   /// getLazyBitcodeModule - Read the header of the specified bitcode buffer
   /// and prepare for lazy deserialization of function bodies.  If successful,
   /// this takes ownership of 'buffer' and returns a non-null pointer.  On
   /// error, this returns null, *does not* take ownership of Buffer, and fills
   /// in *ErrMsg with an error description if ErrMsg is non-null.
   Module *getLazyBitcodeModule(MemoryBuffer *Buffer,
-                               LLVMContext& Context,
+                               LLVMContext &Context,
                                std::string *ErrMsg = 0);
 
+  /// getStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  Module *getStreamedBitcodeModule(const std::string &name,
+                                   DataStreamer *streamer,
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0);
+
   /// getBitcodeTargetTriple - Read the header of the specified bitcode
   /// buffer and extract just the triple information. If successful,
   /// this returns a string and *does not* take ownership
   /// of 'buffer'. On error, this returns "", and fills in *ErrMsg
   /// if ErrMsg is non-null.
   std::string getBitcodeTargetTriple(MemoryBuffer *Buffer,
-                                     LLVMContext& Context,
+                                     LLVMContext &Context,
                                      std::string *ErrMsg = 0);
 
   /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
   /// If an error occurs, this returns null and fills in *ErrMsg if it is
   /// non-null.  This method *never* takes ownership of Buffer.
-  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
                            std::string *ErrMsg = 0);
 
   /// WriteBitcodeToFile - Write the specified module to the specified
@@ -60,8 +70,8 @@
   /// createBitcodeWriterPass - Create and return a pass that writes the module
   /// to the specified ostream.
   ModulePass *createBitcodeWriterPass(raw_ostream &Str);
-  
-  
+
+
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
   ///
@@ -109,21 +119,24 @@
   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
   ///   ... potentially other gunk ...
   /// };
-  /// 
+  ///
   /// This function is called when we find a file with a matching magic number.
   /// In this case, skip down to the subsection of the file that is actually a
   /// BC file.
-  static inline bool SkipBitcodeWrapperHeader(unsigned char *&BufPtr,
-                                              unsigned char *&BufEnd) {
+  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
+  /// contain the whole bitcode file.
+  static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
+                                              const unsigned char *&BufEnd,
+                                              bool VerifyBufferSize) {
     enum {
       KnownHeaderSize = 4*4,  // Size of header we read.
       OffsetField = 2*4,      // Offset in bytes to Offset field.
       SizeField = 3*4         // Offset in bytes to Size field.
     };
-    
+
     // Must contain the header!
     if (BufEnd-BufPtr < KnownHeaderSize) return true;
-    
+
     unsigned Offset = ( BufPtr[OffsetField  ]        |
                        (BufPtr[OffsetField+1] << 8)  |
                        (BufPtr[OffsetField+2] << 16) |
@@ -132,9 +145,9 @@
                        (BufPtr[SizeField  +1] << 8)  |
                        (BufPtr[SizeField  +2] << 16) |
                        (BufPtr[SizeField  +3] << 24));
-    
+
     // Verify that Offset+Size fits in the file.
-    if (Offset+Size > unsigned(BufEnd-BufPtr))
+    if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
       return true;
     BufPtr += Offset;
     BufEnd = BufPtr+Size;

Modified: llvm/trunk/include/llvm/MC/MCDisassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCDisassembler.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCDisassembler.h (original)
+++ llvm/trunk/include/llvm/MC/MCDisassembler.h Mon Feb  6 16:30:29 2012
@@ -79,7 +79,7 @@
   ///                   MCDisassembler::Fail if the instruction was invalid.
   virtual DecodeStatus  getInstruction(MCInst& instr,
                                        uint64_t& size,
-                                       const MemoryObject &region,
+                                       MemoryObject &region,
                                        uint64_t address,
                                        raw_ostream &vStream,
                                        raw_ostream &cStream) const = 0;

Added: llvm/trunk/include/llvm/Support/DataStream.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/DataStream.h?rev=149918&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/DataStream.h (added)
+++ llvm/trunk/include/llvm/Support/DataStream.h Mon Feb  6 16:30:29 2012
@@ -0,0 +1,38 @@
+//===---- llvm/Support/DataStream.h - Lazy bitcode streaming -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines DataStreamer, which fetches bytes of data from
+// a stream source. It provides support for streaming (lazy reading) of
+// data, e.g. bitcode
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_SUPPORT_DATASTREAM_H_
+#define LLVM_SUPPORT_DATASTREAM_H_
+
+#include <string>
+
+namespace llvm {
+
+class DataStreamer {
+public:
+  /// Fetch bytes [start-end) from the stream, and write them to the
+  /// buffer pointed to by buf. Returns the number of bytes actually written.
+  virtual size_t GetBytes(unsigned char *buf, size_t len) = 0;
+
+  virtual ~DataStreamer();
+};
+
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+                                  std::string *Err);
+
+}
+
+#endif  // LLVM_SUPPORT_DATASTREAM_H_

Modified: llvm/trunk/include/llvm/Support/MemoryObject.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MemoryObject.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/MemoryObject.h (original)
+++ llvm/trunk/include/llvm/Support/MemoryObject.h Mon Feb  6 16:30:29 2012
@@ -34,7 +34,7 @@
   ///                   is getBase() + getExtent() - 1).
   ///
   /// @result         - The size of the region.
-  virtual uint64_t getExtent() const = 0;
+  virtual uint64_t getExtent() = 0;
   
   /// readByte        - Tries to read a single byte from the region.
   ///
@@ -42,7 +42,7 @@
   /// @param ptr      - A pointer to a byte to be filled in.  Must be non-NULL.
   /// @result         - 0 if successful; -1 if not.  Failure may be due to a
   ///                   bounds violation or an implementation-specific error.
-  virtual int readByte(uint64_t address, uint8_t* ptr) const = 0;
+  virtual int readByte(uint64_t address, uint8_t* ptr) = 0;
   
   /// readBytes       - Tries to read a contiguous range of bytes from the
   ///                   region, up to the end of the region.
@@ -61,7 +61,7 @@
   virtual int readBytes(uint64_t address,
                         uint64_t size,
                         uint8_t* buf,
-                        uint64_t* copied) const;
+                        uint64_t* copied);
 };
 
 }

Added: llvm/trunk/include/llvm/Support/StreamableMemoryObject.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/StreamableMemoryObject.h?rev=149918&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/StreamableMemoryObject.h (added)
+++ llvm/trunk/include/llvm/Support/StreamableMemoryObject.h Mon Feb  6 16:30:29 2012
@@ -0,0 +1,181 @@
+//===- StreamableMemoryObject.h - Streamable data interface - -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef STREAMABLEMEMORYOBJECT_H_
+#define STREAMABLEMEMORYOBJECT_H_
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/DataStream.h"
+#include <vector>
+
+namespace llvm {
+
+/// StreamableMemoryObject - Interface to data which might be streamed.
+/// Streamability has 2 important implications/restrictions. First, the data
+/// might not yet exist in memory when the request is made. This just means
+/// that readByte/readBytes might have to block or do some work to get it.
+/// More significantly, the exact size of the object might not be known until
+/// it has all been fetched. This means that to return the right result,
+/// getExtent must also wait for all the data to arrive; therefore it should
+/// not be called on objects which are actually streamed (this would defeat
+/// the purpose of streaming). Instead, isValidAddress and isObjectEnd can be
+/// used to test addresses without knowing the exact size of the stream.
+/// Finally, getPointer can be used instead of readBytes to avoid extra copying.
+class StreamableMemoryObject : public MemoryObject {
+ public:
+  /// Destructor      - Override as necessary.
+  virtual ~StreamableMemoryObject();
+
+  /// getBase         - Returns the lowest valid address in the region.
+  ///
+  /// @result         - The lowest valid address.
+  virtual uint64_t getBase() const = 0;
+
+  /// getExtent       - Returns the size of the region in bytes.  (The region is
+  ///                   contiguous, so the highest valid address of the region
+  ///                   is getBase() + getExtent() - 1).
+  ///                   May block until all bytes in the stream have been read
+  ///
+  /// @result         - The size of the region.
+  virtual uint64_t getExtent() = 0;
+
+  /// readByte        - Tries to read a single byte from the region.
+  ///                   May block until (address - base) bytes have been read
+  /// @param address  - The address of the byte, in the same space as getBase().
+  /// @param ptr      - A pointer to a byte to be filled in.  Must be non-NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readByte(uint64_t address, uint8_t* ptr) = 0;
+
+  /// readBytes       - Tries to read a contiguous range of bytes from the
+  ///                   region, up to the end of the region.
+  ///                   May block until (address - base + size) bytes have
+  ///                   been read. Additionally, StreamableMemoryObjects will
+  ///                   not do partial reads - if size bytes cannot be read,
+  ///                   readBytes will fail.
+  ///
+  /// @param address  - The address of the first byte, in the same space as
+  ///                   getBase().
+  /// @param size     - The maximum number of bytes to copy.
+  /// @param buf      - A pointer to a buffer to be filled in.  Must be non-NULL
+  ///                   and large enough to hold size bytes.
+  /// @param copied   - A pointer to a nunber that is filled in with the number
+  ///                   of bytes actually read.  May be NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) = 0;
+
+  /// getPointer  - Ensures that the requested data is in memory, and returns
+  ///               A pointer to it. More efficient than using readBytes if the
+  ///               data is already in memory.
+  ///               May block until (address - base + size) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @param size    - amount of data that must be available on return
+  /// @result        - valid pointer to the requested data
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) = 0;
+
+  /// isValidAddress - Returns true if the address is within the object
+  ///                  (i.e. between base and base + extent - 1 inclusive)
+  ///                  May block until (address - base) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @result        - true if the address may be read with readByte()
+  virtual bool isValidAddress(uint64_t address) = 0;
+
+  /// isObjectEnd    - Returns true if the address is one past the end of the
+  ///                  object (i.e. if it is equal to base + extent)
+  ///                  May block until (address - base) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @result        - true if the address is equal to base + extent
+  virtual bool isObjectEnd(uint64_t address) = 0;
+};
+
+/// StreamingMemoryObject - interface to data which is actually streamed from
+/// a DataStreamer. In addition to inherited members, it has the
+/// dropLeadingBytes and setKnownObjectSize methods which are not applicable
+/// to non-streamed objects.
+class StreamingMemoryObject : public StreamableMemoryObject {
+public:
+  StreamingMemoryObject(DataStreamer *streamer);
+  virtual uint64_t getBase() const { return 0; }
+  virtual uint64_t getExtent();
+  virtual int readByte(uint64_t address, uint8_t* ptr);
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied);
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) {
+    // This could be fixed by ensuring the bytes are fetched and making a copy,
+    // requiring that the bitcode size be known, or otherwise ensuring that
+    // the memory doesn't go away/get reallocated, but it's
+    // not currently necessary. Users that need the pointer don't stream.
+    assert(0 && "getPointer in streaming memory objects not allowed");
+    return NULL;
+  }
+  virtual bool isValidAddress(uint64_t address);
+  virtual bool isObjectEnd(uint64_t address);
+
+  /// Drop s bytes from the front of the stream, pushing the positions of the
+  /// remaining bytes down by s. This is used to skip past the bitcode header,
+  /// since we don't know a priori if it's present, and we can't put bytes
+  /// back into the stream once we've read them.
+  bool dropLeadingBytes(size_t s);
+
+  /// If the data object size is known in advance, many of the operations can
+  /// be made more efficient, so this method should be called before reading
+  /// starts (although it can be called anytime).
+  void setKnownObjectSize(size_t size);
+
+private:
+  const static uint32_t kChunkSize = 4096 * 4;
+  std::vector<unsigned char> Bytes;
+  OwningPtr<DataStreamer> Streamer;
+  size_t BytesRead;   // Bytes read from stream
+  size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header)
+  size_t ObjectSize; // 0 if unknown, set if wrapper was seen or EOF reached
+  bool EOFReached;
+
+  // Fetch enough bytes such that Pos can be read or EOF is reached
+  // (i.e. BytesRead > Pos). Return true if Pos can be read.
+  // Unlike most of the functions in BitcodeReader, returns true on success.
+  // Most of the requests will be small, but we fetch at kChunkSize bytes
+  // at a time to avoid making too many potentially expensive GetBytes calls
+  bool fetchToPos(size_t Pos) {
+    if (EOFReached) return Pos < ObjectSize;
+    while (Pos >= BytesRead) {
+      Bytes.resize(BytesRead + kChunkSize);
+      size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped],
+                                        kChunkSize);
+      BytesRead += bytes;
+      if (bytes < kChunkSize) {
+        if (ObjectSize && BytesRead < Pos)
+          assert(0 && "Unexpected short read fetching bitcode");
+        if (BytesRead <= Pos) { // reached EOF/ran out of bytes
+          ObjectSize = BytesRead;
+          EOFReached = true;
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  StreamingMemoryObject(const StreamingMemoryObject&);  // DO NOT IMPLEMENT
+  void operator=(const StreamingMemoryObject&);  // DO NOT IMPLEMENT
+};
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+    const unsigned char *Start, const unsigned char *End);
+
+}
+#endif  // STREAMABLEMEMORYOBJECT_H_

Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Mon Feb  6 16:30:29 2012
@@ -22,6 +22,7 @@
 #include "llvm/AutoUpgrade.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/OperandTraits.h"
@@ -1409,8 +1410,36 @@
   return false;
 }
 
-bool BitcodeReader::ParseModule() {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+bool BitcodeReader::GlobalCleanup() {
+  // Patch the initializers for globals and aliases up.
+  ResolveGlobalAndAliasInits();
+  if (!GlobalInits.empty() || !AliasInits.empty())
+    return Error("Malformed global initializer set");
+
+  // Look for intrinsic functions which need to be upgraded at some point
+  for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    Function *NewFn;
+    if (UpgradeIntrinsicFunction(FI, NewFn))
+      UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+  }
+
+  // Look for global variables which need to be renamed.
+  for (Module::global_iterator
+         GI = TheModule->global_begin(), GE = TheModule->global_end();
+       GI != GE; ++GI)
+    UpgradeGlobalVariable(GI);
+  // Force deallocation of memory for these vectors to favor the client that
+  // want lazy deserialization.
+  std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+  std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+  return false;
+}
+
+bool BitcodeReader::ParseModule(bool Resume) {
+  if (Resume)
+    Stream.JumpToBit(NextUnreadBit);
+  else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
@@ -1424,33 +1453,7 @@
       if (Stream.ReadBlockEnd())
         return Error("Error at end of module block");
 
-      // Patch the initializers for globals and aliases up.
-      ResolveGlobalAndAliasInits();
-      if (!GlobalInits.empty() || !AliasInits.empty())
-        return Error("Malformed global initializer set");
-      if (!FunctionsWithBodies.empty())
-        return Error("Too few function bodies found");
-
-      // Look for intrinsic functions which need to be upgraded at some point
-      for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
-           FI != FE; ++FI) {
-        Function* NewFn;
-        if (UpgradeIntrinsicFunction(FI, NewFn))
-          UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
-      }
-
-      // Look for global variables which need to be renamed.
-      for (Module::global_iterator
-             GI = TheModule->global_begin(), GE = TheModule->global_end();
-           GI != GE; ++GI)
-        UpgradeGlobalVariable(GI);
-
-      // Force deallocation of memory for these vectors to favor the client that
-      // want lazy deserialization.
-      std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
-      std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
-      std::vector<Function*>().swap(FunctionsWithBodies);
-      return false;
+      return GlobalCleanup();
     }
 
     if (Code == bitc::ENTER_SUBBLOCK) {
@@ -1474,6 +1477,7 @@
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (ParseValueSymbolTable())
           return true;
+        SeenValueSymbolTable = true;
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (ParseConstants() || ResolveGlobalAndAliasInits())
@@ -1486,13 +1490,25 @@
       case bitc::FUNCTION_BLOCK_ID:
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
-        if (!HasReversedFunctionsWithBodies) {
+        if (!SeenFirstFunctionBody) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
-          HasReversedFunctionsWithBodies = true;
+          if (GlobalCleanup())
+            return true;
+          SeenFirstFunctionBody = true;
         }
 
         if (RememberAndSkipFunctionBody())
           return true;
+        // For streaming bitcode, suspend parsing when we reach the function
+        // bodies. Subsequent materialization calls will resume it when
+        // necessary. For streaming, the function bodies must be at the end of
+        // the bitcode. If the bitcode file is old, the symbol table will be
+        // at the end instead and will not have been seen yet. In this case,
+        // just finish the parse now.
+        if (LazyStreamer && SeenValueSymbolTable) {
+          NextUnreadBit = Stream.GetCurrentBitNo();
+          return false;
+        }
         break;
       case bitc::USELIST_BLOCK_ID:
         if (ParseUseLists())
@@ -1651,8 +1667,10 @@
 
       // If this is a function with a body, remember the prototype we are
       // creating now, so that we can match up the body with them later.
-      if (!isProto)
+      if (!isProto) {
         FunctionsWithBodies.push_back(Func);
+        if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+      }
       break;
     }
     // ALIAS: [alias type, aliasee val#, linkage]
@@ -1691,24 +1709,7 @@
 bool BitcodeReader::ParseBitcodeInto(Module *M) {
   TheModule = 0;
 
-  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
-  if (Buffer->getBufferSize() & 3) {
-    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
-      return Error("Invalid bitcode signature");
-    else
-      return Error("Bitcode stream should be a multiple of 4 bytes in length");
-  }
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
-      return Error("Invalid bitcode wrapper header");
-
-  StreamFile.init(BufPtr, BufEnd);
-  Stream.init(StreamFile);
+  if (InitStream()) return true;
 
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
@@ -1750,8 +1751,9 @@
       if (TheModule)
         return Error("Multiple MODULE_BLOCKs in same stream");
       TheModule = M;
-      if (ParseModule())
+      if (ParseModule(false))
         return true;
+      if (LazyStreamer) return false;
       break;
     default:
       if (Stream.SkipBlock())
@@ -1819,20 +1821,7 @@
 }
 
 bool BitcodeReader::ParseTriple(std::string &Triple) {
-  if (Buffer->getBufferSize() & 3)
-    return Error("Bitcode stream should be a multiple of 4 bytes in length");
-
-  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
-      return Error("Invalid bitcode wrapper header");
-
-  StreamFile.init(BufPtr, BufEnd);
-  Stream.init(StreamFile);
+  if (InitStream()) return true;
 
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
@@ -2708,6 +2697,19 @@
   return false;
 }
 
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool BitcodeReader::FindFunctionInStream(Function *F,
+       DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+  while (DeferredFunctionInfoIterator->second == 0) {
+    if (Stream.AtEndOfStream())
+      return Error("Could not find Function in stream");
+    // ParseModule will parse the next body in the stream and set its
+    // position in the DeferredFunctionInfo map.
+    if (ParseModule(true)) return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // GVMaterializer implementation
 //===----------------------------------------------------------------------===//
@@ -2728,6 +2730,10 @@
 
   DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+  // If its position is recorded as 0, its body is somewhere in the stream
+  // but we haven't seen it yet.
+  if (DFII->second == 0)
+    if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
 
   // Move the bit stream to the saved position of the deferred function body.
   Stream.JumpToBit(DFII->second);
@@ -2805,6 +2811,57 @@
   return false;
 }
 
+bool BitcodeReader::InitStream() {
+  if (LazyStreamer) return InitLazyStream();
+  return InitStreamFromBuffer();
+}
+
+bool BitcodeReader::InitStreamFromBuffer() {
+  const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3) {
+    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+      return Error("Invalid bitcode signature");
+    else
+      return Error("Bitcode stream should be a multiple of 4 bytes in length");
+  }
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+  Stream.init(*StreamFile);
+
+  return false;
+}
+
+bool BitcodeReader::InitLazyStream() {
+  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+  // see it.
+  StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+  StreamFile.reset(new BitstreamReader(Bytes));
+  Stream.init(*StreamFile);
+
+  unsigned char buf[16];
+  if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+    return Error("Bitcode stream must be at least 16 bytes in length");
+
+  if (!isBitcode(buf, buf + 16))
+    return Error("Invalid bitcode signature");
+
+  if (isBitcodeWrapper(buf, buf + 4)) {
+    const unsigned char *bitcodeStart = buf;
+    const unsigned char *bitcodeEnd = buf + 16;
+    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+    Bytes->dropLeadingBytes(bitcodeStart - buf);
+    Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
+  }
+  return false;
+}
 
 //===----------------------------------------------------------------------===//
 // External interface
@@ -2833,6 +2890,24 @@
   return M;
 }
 
+
+Module *llvm::getStreamedBitcodeModule(const std::string &name,
+                                       DataStreamer *streamer,
+                                       LLVMContext &Context,
+                                       std::string *ErrMsg) {
+  Module *M = new Module(name, Context);
+  BitcodeReader *R = new BitcodeReader(streamer, Context);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  R->setBufferOwned(false); // no buffer to delete
+  return M;
+}
+
 /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
 /// If an error occurs, return null and fill in *ErrMsg if non-null.
 Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,

Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.h (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.h Mon Feb  6 16:30:29 2012
@@ -126,8 +126,11 @@
   Module *TheModule;
   MemoryBuffer *Buffer;
   bool BufferOwned;
-  BitstreamReader StreamFile;
+  OwningPtr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
+  DataStreamer *LazyStreamer;
+  uint64_t NextUnreadBit;
+  bool SeenValueSymbolTable;
   
   const char *ErrorString;
   
@@ -161,9 +164,10 @@
   // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
   DenseMap<unsigned, unsigned> MDKindMap;
   
-  // After the module header has been read, the FunctionsWithBodies list is 
-  // reversed.  This keeps track of whether we've done this yet.
-  bool HasReversedFunctionsWithBodies;
+  // Several operations happen after the module header has been read, but
+  // before function bodies are processed. This keeps track of whether
+  // we've done this yet.
+  bool SeenFirstFunctionBody;
   
   /// DeferredFunctionInfo - When function bodies are initially scanned, this
   /// map contains info about where to find deferred function body in the
@@ -178,8 +182,13 @@
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
-      ErrorString(0), ValueList(C), MDValueList(C) {
-    HasReversedFunctionsWithBodies = false;
+      LazyStreamer(0), SeenValueSymbolTable(false), ErrorString(0),
+      ValueList(C), MDValueList(C), SeenFirstFunctionBody(false) {
+  }
+  explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
+    : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+      LazyStreamer(streamer), SeenValueSymbolTable(false), ErrorString(0),
+      ValueList(C), MDValueList(C), SeenFirstFunctionBody(false) {
   }
   ~BitcodeReader() {
     FreeState();
@@ -258,7 +267,7 @@
   }
 
   
-  bool ParseModule();
+  bool ParseModule(bool Resume);
   bool ParseAttributeBlock();
   bool ParseTypeTable();
   bool ParseTypeTableBody();
@@ -267,11 +276,17 @@
   bool ParseConstants();
   bool RememberAndSkipFunctionBody();
   bool ParseFunctionBody(Function *F);
+  bool GlobalCleanup();
   bool ResolveGlobalAndAliasInits();
   bool ParseMetadata();
   bool ParseMetadataAttachment();
   bool ParseModuleTriple(std::string &Triple);
   bool ParseUseLists();
+  bool InitStream();
+  bool InitStreamFromBuffer();
+  bool InitLazyStream();
+  bool FindFunctionInStream(Function *F,
+         DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
   
 } // End llvm namespace

Modified: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp (original)
+++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp Mon Feb  6 16:30:29 2012
@@ -1738,11 +1738,6 @@
   // Emit metadata.
   WriteModuleMetadata(M, VE, Stream);
 
-  // Emit function bodies.
-  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
-    if (!F->isDeclaration())
-      WriteFunction(*F, VE, Stream);
-
   // Emit metadata.
   WriteModuleMetadataStore(M, Stream);
 
@@ -1753,6 +1748,11 @@
   if (EnablePreserveUseListOrdering)
     WriteModuleUseLists(M, VE, Stream);
 
+  // Emit function bodies.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+    if (!F->isDeclaration())
+      WriteFunction(*F, VE, Stream);
+
   Stream.ExitBlock();
 }
 

Modified: llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp (original)
+++ llvm/trunk/lib/MC/MCDisassembler/Disassembler.cpp Mon Feb  6 16:30:29 2012
@@ -100,9 +100,9 @@
                      Bytes(bytes), Size(size), BasePC(basePC) {}
  
   uint64_t getBase() const { return BasePC; }
-  uint64_t getExtent() const { return Size; }
+  uint64_t getExtent() { return Size; }
 
-  int readByte(uint64_t Addr, uint8_t *Byte) const {
+  int readByte(uint64_t Addr, uint8_t *Byte) {
     if (Addr - BasePC >= Size)
       return -1;
     *Byte = Bytes[Addr - BasePC];

Modified: llvm/trunk/lib/MC/MCDisassembler/EDDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDisassembler/EDDisassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCDisassembler/EDDisassembler.cpp (original)
+++ llvm/trunk/lib/MC/MCDisassembler/EDDisassembler.cpp Mon Feb  6 16:30:29 2012
@@ -207,8 +207,8 @@
                    void *arg) : Callback(callback), Arg(arg) { }
     ~EDMemoryObject() { }
     uint64_t getBase() const { return 0x0; }
-    uint64_t getExtent() const { return (uint64_t)-1; }
-    int readByte(uint64_t address, uint8_t *ptr) const {
+    uint64_t getExtent() { return (uint64_t)-1; }
+    int readByte(uint64_t address, uint8_t *ptr) {
       if (!Callback)
         return -1;
       

Modified: llvm/trunk/lib/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Support/CMakeLists.txt (original)
+++ llvm/trunk/lib/Support/CMakeLists.txt Mon Feb  6 16:30:29 2012
@@ -16,6 +16,7 @@
   ConstantRange.cpp
   CrashRecoveryContext.cpp
   DataExtractor.cpp
+  DataStream.cpp
   Debug.cpp
   DeltaAlgorithm.cpp
   DAGDeltaAlgorithm.cpp
@@ -42,6 +43,7 @@
   SmallVector.cpp
   SourceMgr.cpp
   Statistic.cpp
+  StreamableMemoryObject.cpp
   StringExtras.cpp
   StringMap.cpp
   StringPool.cpp

Added: llvm/trunk/lib/Support/DataStream.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/DataStream.cpp?rev=149918&view=auto
==============================================================================
--- llvm/trunk/lib/Support/DataStream.cpp (added)
+++ llvm/trunk/lib/Support/DataStream.cpp Mon Feb  6 16:30:29 2012
@@ -0,0 +1,96 @@
+//===--- llvm/Support/DataStream.cpp - Lazy streamed Data               ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DataStreamer, which fetches bytes of Data from
+// a stream source. It provides support for streaming (lazy reading) of
+// bitcode. An example implementation of streaming from a file or stdin
+// is included.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "Data-stream"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <cerrno>
+#include <cstdio>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+// Interface goals:
+// * StreamableMemoryObject doesn't care about complexities like using
+//   threads/async callbacks to actually overlap download+compile
+// * Don't want to duplicate Data in memory
+// * Don't need to know total Data len in advance
+// Non-goals:
+// StreamableMemoryObject already has random access so this interface only does
+// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
+// Data here in addition to MemoryObject).  This also means that if we want
+// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
+
+STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
+
+namespace llvm {
+DataStreamer::~DataStreamer() {}
+}
+
+namespace {
+
+const static error_code success;
+
+// Very simple stream backed by a file. Mostly useful for stdin and debugging;
+// actual file access is probably still best done with mmap.
+class DataFileStreamer : public DataStreamer {
+ int Fd;
+public:
+  DataFileStreamer() : Fd(0) {}
+  virtual ~DataFileStreamer() {
+    close(Fd);
+  }
+  virtual size_t GetBytes(unsigned char *buf, size_t len) {
+    NumStreamFetches++;
+    return read(Fd, buf, len);
+  }
+
+  error_code OpenFile(const std::string &Filename) {
+    int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+    OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
+#endif
+    if (Filename == "-")
+      Fd = 0;
+    else
+      Fd = ::open(Filename.c_str(), OpenFlags);
+    if (Fd == -1) return error_code(errno, posix_category());
+      return success;
+  }
+};
+
+}
+
+namespace llvm {
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+                                  std::string *StrError) {
+  DataFileStreamer *s = new DataFileStreamer();
+  error_code e = s->OpenFile(Filename);
+  if (e != success) {
+    *StrError = std::string("Could not open ") + Filename + ": " +
+        e.message() + "\n";
+    return NULL;
+  }
+  return s;
+}
+
+}

Modified: llvm/trunk/lib/Support/MemoryObject.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/MemoryObject.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Support/MemoryObject.cpp (original)
+++ llvm/trunk/lib/Support/MemoryObject.cpp Mon Feb  6 16:30:29 2012
@@ -16,7 +16,7 @@
 int MemoryObject::readBytes(uint64_t address,
                             uint64_t size,
                             uint8_t* buf,
-                            uint64_t* copied) const {
+                            uint64_t* copied) {
   uint64_t current = address;
   uint64_t limit = getBase() + getExtent();
 

Added: llvm/trunk/lib/Support/StreamableMemoryObject.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StreamableMemoryObject.cpp?rev=149918&view=auto
==============================================================================
--- llvm/trunk/lib/Support/StreamableMemoryObject.cpp (added)
+++ llvm/trunk/lib/Support/StreamableMemoryObject.cpp Mon Feb  6 16:30:29 2012
@@ -0,0 +1,137 @@
+//===- StreamableMemoryObject.cpp - Streamable data interface - -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <cassert>
+#include <cstring>
+
+
+using namespace llvm;
+
+namespace {
+
+class RawMemoryObject : public StreamableMemoryObject {
+public:
+  RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
+    FirstChar(Start), LastChar(End) {
+    assert(LastChar > FirstChar && "Invalid start/end range");
+  }
+
+  virtual uint64_t getBase() const { return 0; }
+  virtual uint64_t getExtent() { return LastChar - FirstChar; }
+  virtual int readByte(uint64_t address, uint8_t* ptr);
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied);
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size);
+  virtual bool isValidAddress(uint64_t address) {return validAddress(address);}
+  virtual bool isObjectEnd(uint64_t address) {return objectEnd(address);}
+
+private:
+  const uint8_t* const FirstChar;
+  const uint8_t* const LastChar;
+
+  // These are implemented as inline functions here to avoid multiple virtual
+  // calls per public function
+  bool validAddress(uint64_t address) {
+    return static_cast<ptrdiff_t>(address) < LastChar - FirstChar;
+  }
+  bool objectEnd(uint64_t address) {
+    return static_cast<ptrdiff_t>(address) == LastChar - FirstChar;
+  }
+
+  RawMemoryObject(const RawMemoryObject&);  // DO NOT IMPLEMENT
+  void operator=(const RawMemoryObject&);  // DO NOT IMPLEMENT
+};
+
+int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) {
+  if (!validAddress(address)) return -1;
+  *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
+  return 0;
+}
+
+int RawMemoryObject::readBytes(uint64_t address,
+                               uint64_t size,
+                               uint8_t* buf,
+                               uint64_t* copied) {
+  if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
+  memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
+  if (copied) *copied = size;
+  return size;
+}
+
+const uint8_t *RawMemoryObject::getPointer(uint64_t address, uint64_t size) {
+  return FirstChar + address;
+}
+} // anonymous namespace
+
+namespace llvm {
+// If the bitcode has a header, then its size is known, and we don't have to
+// block until we actually want to read it.
+bool StreamingMemoryObject::isValidAddress(uint64_t address) {
+  if (ObjectSize && address < ObjectSize) return true;
+    return fetchToPos(address);
+}
+
+bool StreamingMemoryObject::isObjectEnd(uint64_t address) {
+  if (ObjectSize) return address == ObjectSize;
+  fetchToPos(address);
+  return address == ObjectSize && address != 0;
+}
+
+uint64_t StreamingMemoryObject::getExtent() {
+  if (ObjectSize) return ObjectSize;
+  size_t pos = BytesRead + kChunkSize;
+  // keep fetching until we run out of bytes
+  while (fetchToPos(pos)) pos += kChunkSize;
+  return ObjectSize;
+}
+
+int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) {
+  if (!fetchToPos(address)) return -1;
+  *ptr = Bytes[address + BytesSkipped];
+  return 0;
+}
+
+int StreamingMemoryObject::readBytes(uint64_t address,
+                                     uint64_t size,
+                                     uint8_t* buf,
+                                     uint64_t* copied) {
+  if (!fetchToPos(address + size - 1)) return -1;
+  memcpy(buf, &Bytes[address + BytesSkipped], size);
+  if (copied) *copied = size;
+  return 0;
+}
+
+bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
+  if (BytesRead < s) return true;
+  BytesSkipped = s;
+  BytesRead -= s;
+  return false;
+}
+
+void StreamingMemoryObject::setKnownObjectSize(size_t size) {
+  ObjectSize = size;
+  Bytes.reserve(size);
+}
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+    const unsigned char *Start, const unsigned char *End) {
+  return new RawMemoryObject(Start, End);
+}
+
+StreamableMemoryObject::~StreamableMemoryObject() { }
+
+StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
+  Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
+  ObjectSize(0), EOFReached(false) {
+  BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+}
+}

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Feb  6 16:30:29 2012
@@ -46,7 +46,7 @@
   /// getInstruction - See MCDisassembler.
   DecodeStatus getInstruction(MCInst &instr,
                               uint64_t &size,
-                              const MemoryObject &region,
+                              MemoryObject &region,
                               uint64_t address,
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;
@@ -71,7 +71,7 @@
   /// getInstruction - See MCDisassembler.
   DecodeStatus getInstruction(MCInst &instr,
                               uint64_t &size,
-                              const MemoryObject &region,
+                              MemoryObject &region,
                               uint64_t address,
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;
@@ -341,7 +341,7 @@
 }
 
 DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                             const MemoryObject &Region,
+                                             MemoryObject &Region,
                                              uint64_t Address,
                                              raw_ostream &os,
                                              raw_ostream &cs) const {
@@ -691,7 +691,7 @@
 }
 
 DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                               const MemoryObject &Region,
+                                               MemoryObject &Region,
                                                uint64_t Address,
                                                raw_ostream &os,
                                                raw_ostream &cs) const {

Modified: llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp Mon Feb  6 16:30:29 2012
@@ -502,7 +502,7 @@
 
 MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
                                         uint64_t &size,
-                                        const MemoryObject &region,
+                                        MemoryObject &region,
                                         uint64_t address,
                                         raw_ostream &vStream,
                                         raw_ostream &cStream) const {

Modified: llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h (original)
+++ llvm/trunk/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h Mon Feb  6 16:30:29 2012
@@ -40,7 +40,7 @@
   /// getInstruction - See MCDisassembler.
   MCDisassembler::DecodeStatus getInstruction(MCInst &instr,
                       uint64_t &size,
-                      const MemoryObject &region,
+                      MemoryObject &region,
                       uint64_t address,
                       raw_ostream &vStream,
                       raw_ostream &cStream) const;

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.cpp Mon Feb  6 16:30:29 2012
@@ -112,7 +112,7 @@
 MCDisassembler::DecodeStatus
 X86GenericDisassembler::getInstruction(MCInst &instr,
                                        uint64_t &size,
-                                       const MemoryObject &region,
+                                       MemoryObject &region,
                                        uint64_t address,
                                        raw_ostream &vStream,
                                        raw_ostream &cStream) const {

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86Disassembler.h Mon Feb  6 16:30:29 2012
@@ -114,7 +114,7 @@
   /// getInstruction - See MCDisassembler.
   DecodeStatus getInstruction(MCInst &instr,
                               uint64_t &size,
-                              const MemoryObject &region,
+                              MemoryObject &region,
                               uint64_t address,
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;

Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp (original)
+++ llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Mon Feb  6 16:30:29 2012
@@ -483,13 +483,13 @@
   if (MemBuf->getBufferSize() & 3)
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
 
-  unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
-  unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+  const unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
 
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, EndBufPtr))
-    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr))
+    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
       return Error("Invalid bitcode wrapper header");
 
   BitstreamReader StreamFile(BufPtr, EndBufPtr);

Modified: llvm/trunk/tools/llvm-dis/llvm-dis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-dis/llvm-dis.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-dis/llvm-dis.cpp (original)
+++ llvm/trunk/tools/llvm-dis/llvm-dis.cpp Mon Feb  6 16:30:29 2012
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -126,12 +127,19 @@
   std::string ErrorMessage;
   std::auto_ptr<Module> M;
 
-  {
-    OwningPtr<MemoryBuffer> BufferPtr;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
-      ErrorMessage = ec.message();
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
     else
-      M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
+      DisplayFilename = InputFilename;
+    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                     &ErrorMessage));
+    if(M.get() != 0 && M->MaterializeAllPermanently(&ErrorMessage)) {
+      M.reset();
+    }
   }
 
   if (M.get() == 0) {
@@ -183,4 +191,3 @@
 
   return 0;
 }
-

Modified: llvm/trunk/tools/llvm-mc/Disassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/Disassembler.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/Disassembler.cpp (original)
+++ llvm/trunk/tools/llvm-mc/Disassembler.cpp Mon Feb  6 16:30:29 2012
@@ -42,9 +42,9 @@
   VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
 
   uint64_t getBase() const { return 0; }
-  uint64_t getExtent() const { return Bytes.size(); }
+  uint64_t getExtent() { return Bytes.size(); }
 
-  int readByte(uint64_t Addr, uint8_t *Byte) const {
+  int readByte(uint64_t Addr, uint8_t *Byte) {
     if (Addr >= getExtent())
       return -1;
     *Byte = Bytes[Addr].first;

Modified: llvm/trunk/tools/llvm-objdump/MCFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.cpp?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/MCFunction.cpp (original)
+++ llvm/trunk/tools/llvm-objdump/MCFunction.cpp Mon Feb  6 16:30:29 2012
@@ -28,7 +28,7 @@
 
 MCFunction
 MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
-                                 const MemoryObject &Region, uint64_t Start,
+                                 MemoryObject &Region, uint64_t Start,
                                  uint64_t End, const MCInstrAnalysis *Ana,
                                  raw_ostream &DebugOut,
                                  SmallVectorImpl<uint64_t> &Calls) {

Modified: llvm/trunk/tools/llvm-objdump/MCFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/MCFunction.h (original)
+++ llvm/trunk/tools/llvm-objdump/MCFunction.h Mon Feb  6 16:30:29 2012
@@ -79,7 +79,7 @@
   // Create an MCFunction from a region of binary machine code.
   static MCFunction
   createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
-                       const MemoryObject &Region, uint64_t Start, uint64_t End,
+                       MemoryObject &Region, uint64_t Start, uint64_t End,
                        const MCInstrAnalysis *Ana, raw_ostream &DebugOut,
                        SmallVectorImpl<uint64_t> &Calls);
 

Modified: llvm/trunk/tools/llvm-objdump/llvm-objdump.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.h?rev=149918&r1=149917&r2=149918&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/llvm-objdump.h (original)
+++ llvm/trunk/tools/llvm-objdump/llvm-objdump.h Mon Feb  6 16:30:29 2012
@@ -31,9 +31,9 @@
   StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
 
   uint64_t getBase() const { return 0; }
-  uint64_t getExtent() const { return Bytes.size(); }
+  uint64_t getExtent() { return Bytes.size(); }
 
-  int readByte(uint64_t Addr, uint8_t *Byte) const {
+  int readByte(uint64_t Addr, uint8_t *Byte) {
     if (Addr >= getExtent())
       return -1;
     *Byte = Bytes[Addr];





More information about the llvm-commits mailing list