[llvm-commits] [llvm] r53275 - in /llvm/trunk: docs/BitCodeFormat.html include/llvm/Bitcode/BitstreamWriter.h lib/Bitcode/Reader/BitcodeReader.cpp lib/Bitcode/Writer/BitcodeWriter.cpp lib/System/Path.cpp

Chris Lattner sabre at nondot.org
Tue Jul 8 22:14:23 PDT 2008


Author: lattner
Date: Wed Jul  9 00:14:23 2008
New Revision: 53275

URL: http://llvm.org/viewvc/llvm-project?rev=53275&view=rev
Log:
Add a little wrapper header that is put around bc files when emitting
bc files for modules with a target triple that indicates they are for
darwin.  The reader unconditionally handles this, and the writer could
turn this on for more targets if we care.

This change has two benefits for darwin:

1) it allows us to encode the cpu type of the file in an easy to read
   place that doesn't require decoding the bc file.
2) it works around a bug (IMO) in darwin's AR where it is incapable of
   handling files that are not a multiple of 8 bytes long.  BC files
   are only guaranteed to be multiples of 4 bytes long.

Modified:
    llvm/trunk/docs/BitCodeFormat.html
    llvm/trunk/include/llvm/Bitcode/BitstreamWriter.h
    llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/trunk/lib/System/Path.cpp

Modified: llvm/trunk/docs/BitCodeFormat.html
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/BitCodeFormat.html?rev=53275&r1=53274&r2=53275&view=diff

==============================================================================
--- llvm/trunk/docs/BitCodeFormat.html (original)
+++ llvm/trunk/docs/BitCodeFormat.html Wed Jul  9 00:14:23 2008
@@ -22,6 +22,8 @@
     <li><a href="#stdblocks">Standard Blocks</a></li>
     </ol>
   </li>
+  <li><a href="#wrapper">Bitcode Wrapper Format</a>
+  </li>
   <li><a href="#llvmir">LLVM IR Encoding</a>
     <ol>
     <li><a href="#basics">Basics</a></li>
@@ -65,8 +67,12 @@
 provides a mechanism for the file to self-describe "abbreviations", which are
 effectively size optimizations for the content.</p>
 
-<p>This document first describes the LLVM bitstream format, then describes the
-record structure used by LLVM IR files.
+<p>LLVM IR files may be optionally embedded into a <a 
+href="#wrapper">wrapper</a> structure that makes it easy to embed extra data
+along with LLVM IR files.</p>
+
+<p>This document first describes the LLVM bitstream format, describes the
+wrapper format, then describes the record structure used by LLVM IR files.
 </p>
 
 </div>
@@ -545,6 +551,36 @@
 </div>
 
 <!-- *********************************************************************** -->
+<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
+structure.  This structure contains a simple header that indicates the offset
+and size of the embedded BC file.  This allows additional information to be
+stored alongside the BC file.  The structure of this file header is:
+</p>
+
+<p>
+<pre>
+[Magic<sub>32</sub>,
+ Version<sub>32</sub>,
+ Offset<sub>32</sub>,
+ Size<sub>32</sub>,
+ CPUType<sub>32</sub>]
+</pre></p>
+
+<p>Each of the fields are 32-bit fields stored in little endian form (as with
+the rest of the bitcode file fields).  The Magic number is always
+<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>.  The Offset
+field is the offset in bytes to the start of the bitcode stream in the file, and
+the Size field is a size in bytes of the stream. CPUType is a target-specific
+value that can be used to encode the CPU of the target.
+</div>
+
+
+<!-- *********************************************************************** -->
 <div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
 <!-- *********************************************************************** -->
 

Modified: llvm/trunk/include/llvm/Bitcode/BitstreamWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamWriter.h?rev=53275&r1=53274&r2=53275&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Bitcode/BitstreamWriter.h (original)
+++ llvm/trunk/include/llvm/Bitcode/BitstreamWriter.h Wed Jul  9 00:14:23 2008
@@ -157,6 +157,15 @@
     Emit(Val, CurCodeSize);
   }
   
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+  
   //===--------------------------------------------------------------------===//
   // Block Manipulation
   //===--------------------------------------------------------------------===//
@@ -227,10 +236,7 @@
     unsigned ByteNo = B.StartSizeWord*4;
     
     // Update the block size field in the header of this sub-block.
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  0);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >>  8);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 16);
-    Out[ByteNo++] = (unsigned char)(SizeInWords >> 24);
+    BackpatchWord(ByteNo, SizeInWords);
     
     // Restore the inner block's code size and abbrev table.
     CurCodeSize = B.PrevCodeSize;

Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=53275&r1=53274&r2=53275&view=diff

==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Wed Jul  9 00:14:23 2008
@@ -1184,6 +1184,47 @@
   return Error("Premature end of bitstream");
 }
 
+/// SkipWrapperHeader - Some systems wrap bc files with a special header for
+/// padding or other reasons.  The format of this header is:
+///
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   ... potentially other gunk ...
+/// };
+/// 
+/// This function is called when we find a file with a matching magic number.
+/// In this case, skip down to the subsection of the file that is actually a BC
+/// file.
+static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) {
+  enum {
+    KnownHeaderSize = 4*4,  // Size of header we read.
+    OffsetField = 2*4,      // Offset in bytes to Offset field.
+    SizeField = 3*4         // Offset in bytes to Size field.
+  };
+  
+  
+  // Must contain the header!
+  if (BufEnd-BufPtr < KnownHeaderSize) return true;
+  
+  unsigned Offset = ( BufPtr[OffsetField  ]        |
+                     (BufPtr[OffsetField+1] << 8)  |
+                     (BufPtr[OffsetField+2] << 16) |
+                     (BufPtr[OffsetField+3] << 24));
+  unsigned Size   = ( BufPtr[SizeField    ]        |
+                     (BufPtr[SizeField  +1] << 8)  |
+                     (BufPtr[SizeField  +2] << 16) |
+                     (BufPtr[SizeField  +3] << 24));
+  
+  // Verify that Offset+Size fits in the file.
+  if (Offset+Size > unsigned(BufEnd-BufPtr))
+    return true;
+  BufPtr += Offset;
+  BufEnd = BufPtr+Size;
+  return false;
+}
 
 bool BitcodeReader::ParseBitcode() {
   TheModule = 0;
@@ -1192,7 +1233,16 @@
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
   
   unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  Stream.init(BufPtr, BufPtr+Buffer->getBufferSize());
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+  
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && 
+      BufPtr[2] == 0x17 && BufPtr[3] == 0x0B)
+    if (SkipWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+  
+  Stream.init(BufPtr, BufEnd);
   
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||

Modified: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=53275&r1=53274&r2=53275&view=diff

==============================================================================
--- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp (original)
+++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp Wed Jul  9 00:14:23 2008
@@ -1273,6 +1273,70 @@
   Stream.ExitBlock();
 }
 
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver.  To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+/// 
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   uint32_t CPUType;       // CPU specifier.
+///   ... potentially more later ...
+/// };
+enum {
+  DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+  DarwinBCHeaderSize = 5*4
+};
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+                               const std::string &TT) {
+  unsigned CPUType = ~0U;
+  
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
+  // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // specific constants here because they are implicitly part of the Darwin ABI.
+  enum {
+    DARWIN_CPU_ARCH_ABI64      = 0x01000000,
+    DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_POWERPC    = 18
+  };
+  
+  if (TT.find("x86_64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+           TT[4] == '-' && TT[1] - '3' < 6)
+    CPUType = DARWIN_CPU_TYPE_X86;
+  else if (TT.find("powerpc-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC;
+  else if (TT.find("powerpc64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  
+  // Traditional Bitcode starts after header.
+  unsigned BCOffset = DarwinBCHeaderSize;
+  
+  Stream.Emit(0x0B17C0DE, 32);
+  Stream.Emit(0         , 32);  // Version.
+  Stream.Emit(BCOffset  , 32);
+  Stream.Emit(0         , 32);  // Filled in later.
+  Stream.Emit(CPUType   , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+  // Update the size field in the header.
+  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+  
+  // If the file is not a multiple of 16 bytes, insert dummy padding.
+  while (BufferSize & 15) {
+    Stream.Emit(0, 8);
+    ++BufferSize;
+  }
+}
+
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
@@ -1282,6 +1346,11 @@
   
   Buffer.reserve(256*1024);
   
+  // If this is darwin, emit a file header and trailer if needed.
+  bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
+  if (isDarwin)
+    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+  
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
@@ -1292,10 +1361,14 @@
 
   // Emit the module.
   WriteModule(M, Stream);
+
+  if (isDarwin)
+    EmitDarwinBCTrailer(Stream, Buffer.size());
+
   
   // If writing to stdout, set binary mode.
   if (llvm::cout == Out)
-      sys::Program::ChangeStdoutToBinary();
+    sys::Program::ChangeStdoutToBinary();
 
   // Write the generated bitstream to "Out".
   Out.write((char*)&Buffer.front(), Buffer.size());

Modified: llvm/trunk/lib/System/Path.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Path.cpp?rev=53275&r1=53274&r2=53275&view=diff

==============================================================================
--- llvm/trunk/lib/System/Path.cpp (original)
+++ llvm/trunk/lib/System/Path.cpp Wed Jul  9 00:14:23 2008
@@ -52,10 +52,15 @@
 }
 
 LLVMFileType
-sys::IdentifyFileType(const char*magic, unsigned length) {
+sys::IdentifyFileType(const char *magic, unsigned length) {
   assert(magic && "Invalid magic number string");
   assert(length >=4 && "Invalid magic number length");
   switch (magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
     case 'B':
       if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
         return Bitcode_FileType;





More information about the llvm-commits mailing list