[llvm-commits] CVS: llvm/lib/Bytecode/Reader/Analyzer.cpp Reader.cpp Reader.h

Reid Spencer reid at x10sys.com
Mon Nov 13 20:47:39 PST 2006



Changes in directory llvm/lib/Bytecode/Reader:

Analyzer.cpp updated: 1.24 -> 1.25
Reader.cpp updated: 1.204 -> 1.205
Reader.h updated: 1.37 -> 1.38
---
Log message:

Discard code that supported old bytecode formats. This makes the Bytecode
Reader code much easier to read and maintain. Backwards compatibility from
version 5 format has been retained. Older formats will produce an error.


---
Diffs of the changes:  (+146 -550)

 Analyzer.cpp |    2 
 Reader.cpp   |  617 +++++++++++++----------------------------------------------
 Reader.h     |   77 -------
 3 files changed, 146 insertions(+), 550 deletions(-)


Index: llvm/lib/Bytecode/Reader/Analyzer.cpp
diff -u llvm/lib/Bytecode/Reader/Analyzer.cpp:1.24 llvm/lib/Bytecode/Reader/Analyzer.cpp:1.25
--- llvm/lib/Bytecode/Reader/Analyzer.cpp:1.24	Thu Nov  2 19:44:51 2006
+++ llvm/lib/Bytecode/Reader/Analyzer.cpp	Mon Nov 13 22:47:22 2006
@@ -532,7 +532,7 @@
     assert(BType >= BytecodeFormat::ModuleBlockID);
     assert(BType < BytecodeFormat::NumberOfBlockIDs);
     bca.BlockSizes[
-      llvm::BytecodeFormat::CompressedBytecodeBlockIdentifiers(BType)] += Size;
+      llvm::BytecodeFormat::BytecodeBlockIdentifiers(BType)] += Size;
 
     if (bca.version < 3) // Check for long block headers versions
       bca.BlockSizes[llvm::BytecodeFormat::Reserved_DoNotUse] += 8;


Index: llvm/lib/Bytecode/Reader/Reader.cpp
diff -u llvm/lib/Bytecode/Reader/Reader.cpp:1.204 llvm/lib/Bytecode/Reader/Reader.cpp:1.205
--- llvm/lib/Bytecode/Reader/Reader.cpp:1.204	Wed Nov  8 15:27:54 2006
+++ llvm/lib/Bytecode/Reader/Reader.cpp	Mon Nov 13 22:47:22 2006
@@ -73,18 +73,6 @@
           " block.");
 }
 
-/// Align the buffer position to a 32 bit boundary
-inline void BytecodeReader::align32() {
-  if (hasAlignment) {
-    BufPtr Save = At;
-    At = (const unsigned char *)((intptr_t)(At+3) & (~3UL));
-    if (At > Save)
-      if (Handler) Handler->handleAlignment(At - Save);
-    if (At > BlockEnd)
-      error("Ran out of data while aligning!");
-  }
-}
-
 /// Read a whole unsigned integer
 inline unsigned BytecodeReader::read_uint() {
   if (At+4 > BlockEnd)
@@ -179,43 +167,9 @@
 
 /// Read a block header and obtain its type and size
 inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) {
-  if ( hasLongBlockHeaders ) {
-    Type = read_uint();
-    Size = read_uint();
-    switch (Type) {
-    case BytecodeFormat::Reserved_DoNotUse :
-      error("Reserved_DoNotUse used as Module Type?");
-      Type = BytecodeFormat::ModuleBlockID; break;
-    case BytecodeFormat::Module:
-      Type = BytecodeFormat::ModuleBlockID; break;
-    case BytecodeFormat::Function:
-      Type = BytecodeFormat::FunctionBlockID; break;
-    case BytecodeFormat::ConstantPool:
-      Type = BytecodeFormat::ConstantPoolBlockID; break;
-    case BytecodeFormat::SymbolTable:
-      Type = BytecodeFormat::SymbolTableBlockID; break;
-    case BytecodeFormat::ModuleGlobalInfo:
-      Type = BytecodeFormat::ModuleGlobalInfoBlockID; break;
-    case BytecodeFormat::GlobalTypePlane:
-      Type = BytecodeFormat::GlobalTypePlaneBlockID; break;
-    case BytecodeFormat::InstructionList:
-      Type = BytecodeFormat::InstructionListBlockID; break;
-    case BytecodeFormat::CompactionTable:
-      Type = BytecodeFormat::CompactionTableBlockID; break;
-    case BytecodeFormat::BasicBlock:
-      /// This block type isn't used after version 1.1. However, we have to
-      /// still allow the value in case this is an old bc format file.
-      /// We just let its value creep thru.
-      break;
-    default:
-      error("Invalid block id found: " + utostr(Type));
-      break;
-    }
-  } else {
-    Size = read_uint();
-    Type = Size & 0x1F; // mask low order five bits
-    Size >>= 5; // get rid of five low order bits, leaving high 27
-  }
+  Size = read_uint(); // Read the header
+  Type = Size & 0x1F; // mask low order five bits to get type
+  Size >>= 5;         // high order 27 bits is the size
   BlockStart = At;
   if (At + Size > BlockEnd)
     error("Attempt to size a block past end of memory");
@@ -223,56 +177,13 @@
   if (Handler) Handler->handleBlock(Type, BlockStart, Size);
 }
 
-
-/// In LLVM 1.2 and before, Types were derived from Value and so they were
-/// written as part of the type planes along with any other Value. In LLVM
-/// 1.3 this changed so that Type does not derive from Value. Consequently,
-/// the BytecodeReader's containers for Values can't contain Types because
-/// there's no inheritance relationship. This means that the "Type Type"
-/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3
-/// whenever a bytecode construct must have both types and values together,
-/// the types are always read/written first and then the Values. Furthermore
-/// since Type::TypeTyID no longer exists, its value (12) now corresponds to
-/// Type::LabelTyID. In order to overcome this we must "sanitize" all the
-/// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change.
-/// For LLVM 1.2 and before, this function will decrement the type id by
-/// one to account for the missing Type::TypeTyID enumerator if the value is
-/// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this
-/// function returns true, otherwise false. This helps detect situations
-/// where the pre 1.3 bytecode is indicating that what follows is a type.
-/// @returns true iff type id corresponds to pre 1.3 "type type"
-inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) {
-  if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later
-    if (TypeId == Type::LabelTyID) {
-      TypeId = Type::VoidTyID; // sanitize it
-      return true; // indicate we got TypeTyID in pre 1.3 bytecode
-    } else if (TypeId > Type::LabelTyID)
-      --TypeId; // shift all planes down because type type plane is missing
-  }
-  return false;
-}
-
-/// Reads a vbr uint to read in a type id and does the necessary
-/// conversion on it by calling sanitizeTypeId.
-/// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type"
-/// @see sanitizeTypeId
-inline bool BytecodeReader::read_typeid(unsigned &TypeId) {
-  TypeId = read_vbr_uint();
-  if ( !has32BitTypes )
-    if ( TypeId == 0x00FFFFFF )
-      TypeId = read_vbr_uint();
-  return sanitizeTypeId(TypeId);
-}
-
 //===----------------------------------------------------------------------===//
 // IR Lookup Methods
 //===----------------------------------------------------------------------===//
 
 /// Determine if a type id has an implicit null value
 inline bool BytecodeReader::hasImplicitNull(unsigned TyID) {
-  if (!hasExplicitPrimitiveZeros)
-    return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
-  return TyID >= Type::FirstDerivedTyID;
+  return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
 }
 
 /// Obtain a type given a typeid and account for things like compaction tables,
@@ -304,23 +215,11 @@
   return Type::VoidTy;
 }
 
-/// Get a sanitized type id. This just makes sure that the \p ID
-/// is both sanitized and not the "type type" of pre-1.3 bytecode.
-/// @see sanitizeTypeId
-inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) {
-  if (sanitizeTypeId(ID))
-    error("Invalid type id encountered");
-  return getType(ID);
-}
-
-/// This method just saves some coding. It uses read_typeid to read
+/// This method just saves some coding. It uses read_vbr_uint to read
 /// in a sanitized type id, errors that its not the type type, and
 /// then calls getType to return the type value.
-inline const Type* BytecodeReader::readSanitizedType() {
-  unsigned ID;
-  if (read_typeid(ID))
-    error("Invalid type id encountered");
-  return getType(ID);
+inline const Type* BytecodeReader::readType() {
+  return getType(read_vbr_uint());
 }
 
 /// Get the slot number associated with a type accounting for primitive
@@ -590,12 +489,6 @@
   if (!hasSignlessDivRem && !hasSignlessShrCastSetcc)
     return 0; // The opcode is fine the way it is.
 
-  // If this is a bytecode format that did not include the unreachable
-  // instruction, bump up the opcode number to adjust it.
-  if (hasNoUnreachableInst)
-    if (Opcode >= 6 && Opcode < 62)
-      ++Opcode;
-
   // If this is bytecode version 6, that only had signed Rem and Div 
   // instructions, then we must compensate for those two instructions only.
   // So that the switch statement below works, we're trying to turn this into
@@ -779,7 +672,7 @@
       CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
       BB->getInstList().push_back(bar);
       BB->getInstList().push_back(new StoreInst(bar, foo));
-      Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
+      Instruction* tmp = new VAArgInst(foo, getType(Oprnds[1]));
       BB->getInstList().push_back(tmp);
       Result = new LoadInst(foo);
       break;
@@ -803,7 +696,7 @@
       CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
       BB->getInstList().push_back(bar);
       BB->getInstList().push_back(new StoreInst(bar, foo));
-      Result = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
+      Result = new VAArgInst(foo, getType(Oprnds[1]));
       break;
     }
     case 34: // Select
@@ -919,11 +812,10 @@
 
     for (unsigned i = 0; i != NumOprnds; ++i)
       Oprnds[i] = read_vbr_uint();
-    align32();
     break;
   }
 
-  const Type *InstTy = getSanitizedType(iType);
+  const Type *InstTy = getType(iType);
 
   // Make the necessary adjustments for dealing with backwards compatibility
   // of opcodes.
@@ -955,7 +847,7 @@
       if (Oprnds.size() != 2)
         error("Invalid VAArg instruction!");
       Result = new VAArgInst(getValue(iType, Oprnds[0]),
-                             getSanitizedType(Oprnds[1]));
+                             getType(Oprnds[1]));
       break;
     case Instruction::ExtractElement: {
       if (Oprnds.size() != 2)
@@ -1001,7 +893,7 @@
       if (Oprnds.size() != 2)
         error("Invalid Cast instruction!");
       Result = new CastInst(getValue(iType, Oprnds[0]),
-                            getSanitizedType(Oprnds[1]));
+                            getType(Oprnds[1]));
       break;
     case Instruction::Select:
       if (Oprnds.size() != 3)
@@ -1235,34 +1127,22 @@
 
         unsigned ValIdx = Oprnds[i];
         unsigned IdxTy = 0;
-        if (!hasRestrictedGEPTypes) {
-          // Struct indices are always uints, sequential type indices can be 
-          // any of the 32 or 64-bit integer types.  The actual choice of 
-          // type is encoded in the low two bits of the slot number.
-          if (isa<StructType>(TopTy))
-            IdxTy = Type::UIntTyID;
-          else {
-            switch (ValIdx & 3) {
-            default:
-            case 0: IdxTy = Type::UIntTyID; break;
-            case 1: IdxTy = Type::IntTyID; break;
-            case 2: IdxTy = Type::ULongTyID; break;
-            case 3: IdxTy = Type::LongTyID; break;
-            }
-            ValIdx >>= 2;
+        // Struct indices are always uints, sequential type indices can be 
+        // any of the 32 or 64-bit integer types.  The actual choice of 
+        // type is encoded in the low two bits of the slot number.
+        if (isa<StructType>(TopTy))
+          IdxTy = Type::UIntTyID;
+        else {
+          switch (ValIdx & 3) {
+          default:
+          case 0: IdxTy = Type::UIntTyID; break;
+          case 1: IdxTy = Type::IntTyID; break;
+          case 2: IdxTy = Type::ULongTyID; break;
+          case 3: IdxTy = Type::LongTyID; break;
           }
-        } else {
-          IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID;
+          ValIdx >>= 2;
         }
-
         Idx.push_back(getValue(IdxTy, ValIdx));
-
-        // Convert ubyte struct indices into uint struct indices.
-        if (isa<StructType>(TopTy) && hasRestrictedGEPTypes)
-          if (ConstantInt *C = dyn_cast<ConstantInt>(Idx.back()))
-            if (C->getType() == Type::UByteTy)
-              Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy);
-
         NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true);
       }
 
@@ -1309,16 +1189,16 @@
 }
 
 /// Get a particular numbered basic block, which might be a forward reference.
-/// This works together with ParseBasicBlock to handle these forward references
-/// in a clean manner.  This function is used when constructing phi, br, switch,
-/// and other instructions that reference basic blocks. Blocks are numbered
-/// sequentially as they appear in the function.
+/// This works together with ParseInstructionList to handle these forward 
+/// references in a clean manner.  This function is used when constructing 
+/// phi, br, switch, and other instructions that reference basic blocks. 
+/// Blocks are numbered sequentially as they appear in the function.
 BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) {
   // Make sure there is room in the table...
   if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1);
 
-  // First check to see if this is a backwards reference, i.e., ParseBasicBlock
-  // has already created this block, or if the forward reference has already
+  // First check to see if this is a backwards reference, i.e. this block
+  // has already been created, or if the forward reference has already
   // been created.
   if (ParsedBasicBlocks[ID])
     return ParsedBasicBlocks[ID];
@@ -1328,34 +1208,10 @@
   return ParsedBasicBlocks[ID] = new BasicBlock();
 }
 
-/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time.
-/// This method reads in one of the basicblock packets. This method is not used
-/// for bytecode files after LLVM 1.0
-/// @returns The basic block constructed.
-BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) {
-  if (Handler) Handler->handleBasicBlockBegin(BlockNo);
-
-  BasicBlock *BB = 0;
-
-  if (ParsedBasicBlocks.size() == BlockNo)
-    ParsedBasicBlocks.push_back(BB = new BasicBlock());
-  else if (ParsedBasicBlocks[BlockNo] == 0)
-    BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
-  else
-    BB = ParsedBasicBlocks[BlockNo];
-
-  std::vector<unsigned> Operands;
-  while (moreInBlock())
-    ParseInstruction(Operands, BB);
-
-  if (Handler) Handler->handleBasicBlockEnd(BlockNo);
-  return BB;
-}
-
 /// Parse all of the BasicBlock's & Instruction's in the body of a function.
 /// In post 1.0 bytecode files, we no longer emit basic block individually,
 /// in order to avoid per-basic-block overhead.
-/// @returns Rhe number of basic blocks encountered.
+/// @returns the number of basic blocks encountered.
 unsigned BytecodeReader::ParseInstructionList(Function* F) {
   unsigned BlockNo = 0;
   std::vector<unsigned> Args;
@@ -1401,52 +1257,35 @@
            E = CurrentFunction->end(); I != E; ++I)
       BBMap.push_back(I);
 
-  /// In LLVM 1.3 we write types separately from values so
-  /// The types are always first in the symbol table. This is
-  /// because Type no longer derives from Value.
-  if (!hasTypeDerivedFromValue) {
-    // Symtab block header: [num entries]
-    unsigned NumEntries = read_vbr_uint();
-    for (unsigned i = 0; i < NumEntries; ++i) {
-      // Symtab entry: [def slot #][name]
-      unsigned slot = read_vbr_uint();
-      std::string Name = read_str();
-      const Type* T = getType(slot);
-      ST->insert(Name, T);
-    }
+  // Symtab block header: [num entries]
+  unsigned NumEntries = read_vbr_uint();
+  for (unsigned i = 0; i < NumEntries; ++i) {
+    // Symtab entry: [def slot #][name]
+    unsigned slot = read_vbr_uint();
+    std::string Name = read_str();
+    const Type* T = getType(slot);
+    ST->insert(Name, T);
   }
 
   while (moreInBlock()) {
     // Symtab block header: [num entries][type id number]
     unsigned NumEntries = read_vbr_uint();
-    unsigned Typ = 0;
-    bool isTypeType = read_typeid(Typ);
+    unsigned Typ = read_vbr_uint();
 
     for (unsigned i = 0; i != NumEntries; ++i) {
       // Symtab entry: [def slot #][name]
       unsigned slot = read_vbr_uint();
       std::string Name = read_str();
-
-      // if we're reading a pre 1.3 bytecode file and the type plane
-      // is the "type type", handle it here
-      if (isTypeType) {
-        const Type* T = getType(slot);
-        if (T == 0)
-          error("Failed type look-up for name '" + Name + "'");
-        ST->insert(Name, T);
-        continue; // code below must be short circuited
+      Value *V = 0;
+      if (Typ == Type::LabelTyID) {
+        if (slot < BBMap.size())
+          V = BBMap[slot];
       } else {
-        Value *V = 0;
-        if (Typ == Type::LabelTyID) {
-          if (slot < BBMap.size())
-            V = BBMap[slot];
-        } else {
-          V = getValue(Typ, slot, false); // Find mapping...
-        }
-        if (V == 0)
-          error("Failed value look-up for name '" + Name + "'");
-        V->setName(Name);
+        V = getValue(Typ, slot, false); // Find mapping...
       }
+      if (V == 0)
+        error("Failed value look-up for name '" + Name + "'");
+      V->setName(Name);
     }
   }
   checkPastBlockEnd("Symbol Table");
@@ -1456,9 +1295,7 @@
 /// Read in the types portion of a compaction table.
 void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) {
   for (unsigned i = 0; i != NumEntries; ++i) {
-    unsigned TypeSlot = 0;
-    if (read_typeid(TypeSlot))
-      error("Invalid type in compaction table: type type");
+    unsigned TypeSlot = read_vbr_uint();
     const Type *Typ = getGlobalTableType(TypeSlot);
     CompactionTypes.push_back(std::make_pair(Typ, TypeSlot));
     if (Handler) Handler->handleCompactionTableType(i, TypeSlot, Typ);
@@ -1471,14 +1308,9 @@
   // Notify handler that we're beginning a compaction table.
   if (Handler) Handler->handleCompactionTableBegin();
 
-  // In LLVM 1.3 Type no longer derives from Value. So,
-  // we always write them first in the compaction table
-  // because they can't occupy a "type plane" where the
-  // Values reside.
-  if (! hasTypeDerivedFromValue) {
-    unsigned NumEntries = read_vbr_uint();
-    ParseCompactionTypes(NumEntries);
-  }
+  // Get the types for the compaction table.
+  unsigned NumEntries = read_vbr_uint();
+  ParseCompactionTypes(NumEntries);
 
   // Compaction tables live in separate blocks so we have to loop
   // until we've read the whole thing.
@@ -1486,7 +1318,6 @@
     // Read the number of Value* entries in the compaction table
     unsigned NumEntries = read_vbr_uint();
     unsigned Ty = 0;
-    unsigned isTypeType = false;
 
     // Decode the type from value read in. Most compaction table
     // planes will have one or two entries in them. If that's the
@@ -1496,42 +1327,35 @@
       // In this case, both low-order bits are set (value 3). This
       // is a signal that the typeid follows.
       NumEntries >>= 2;
-      isTypeType = read_typeid(Ty);
+      Ty = read_vbr_uint();
     } else {
       // In this case, the low-order bits specify the number of entries
       // and the high order bits specify the type.
       Ty = NumEntries >> 2;
-      isTypeType = sanitizeTypeId(Ty);
       NumEntries &= 3;
     }
 
-    // if we're reading a pre 1.3 bytecode file and the type plane
-    // is the "type type", handle it here
-    if (isTypeType) {
-      ParseCompactionTypes(NumEntries);
-    } else {
-      // Make sure we have enough room for the plane.
-      if (Ty >= CompactionValues.size())
-        CompactionValues.resize(Ty+1);
-
-      // Make sure the plane is empty or we have some kind of error.
-      if (!CompactionValues[Ty].empty())
-        error("Compaction table plane contains multiple entries!");
-
-      // Notify handler about the plane.
-      if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
-
-      // Push the implicit zero.
-      CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
-
-      // Read in each of the entries, put them in the compaction table
-      // and notify the handler that we have a new compaction table value.
-      for (unsigned i = 0; i != NumEntries; ++i) {
-        unsigned ValSlot = read_vbr_uint();
-        Value *V = getGlobalTableValue(Ty, ValSlot);
-        CompactionValues[Ty].push_back(V);
-        if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
-      }
+    // Make sure we have enough room for the plane.
+    if (Ty >= CompactionValues.size())
+      CompactionValues.resize(Ty+1);
+
+    // Make sure the plane is empty or we have some kind of error.
+    if (!CompactionValues[Ty].empty())
+      error("Compaction table plane contains multiple entries!");
+
+    // Notify handler about the plane.
+    if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
+
+    // Push the implicit zero.
+    CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
+
+    // Read in each of the entries, put them in the compaction table
+    // and notify the handler that we have a new compaction table value.
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      unsigned ValSlot = read_vbr_uint();
+      Value *V = getGlobalTableValue(Ty, ValSlot);
+      CompactionValues[Ty].push_back(V);
+      if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
     }
   }
   // Notify handler that the compaction table is done.
@@ -1543,23 +1367,20 @@
 // a derived type, then additional data is read to fill out the type
 // definition.
 const Type *BytecodeReader::ParseType() {
-  unsigned PrimType = 0;
-  if (read_typeid(PrimType))
-    error("Invalid type (type type) in type constants!");
-
+  unsigned PrimType = read_vbr_uint();
   const Type *Result = 0;
   if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType)))
     return Result;
 
   switch (PrimType) {
   case Type::FunctionTyID: {
-    const Type *RetType = readSanitizedType();
+    const Type *RetType = readType();
 
     unsigned NumParams = read_vbr_uint();
 
     std::vector<const Type*> Params;
     while (NumParams--)
-      Params.push_back(readSanitizedType());
+      Params.push_back(readType());
 
     bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
     if (isVarArg) Params.pop_back();
@@ -1568,34 +1389,30 @@
     break;
   }
   case Type::ArrayTyID: {
-    const Type *ElementType = readSanitizedType();
+    const Type *ElementType = readType();
     unsigned NumElements = read_vbr_uint();
     Result =  ArrayType::get(ElementType, NumElements);
     break;
   }
   case Type::PackedTyID: {
-    const Type *ElementType = readSanitizedType();
+    const Type *ElementType = readType();
     unsigned NumElements = read_vbr_uint();
     Result =  PackedType::get(ElementType, NumElements);
     break;
   }
   case Type::StructTyID: {
     std::vector<const Type*> Elements;
-    unsigned Typ = 0;
-    if (read_typeid(Typ))
-      error("Invalid element type (type type) for structure!");
-
+    unsigned Typ = read_vbr_uint();
     while (Typ) {         // List is terminated by void/0 typeid
       Elements.push_back(getType(Typ));
-      if (read_typeid(Typ))
-        error("Invalid element type (type type) for structure!");
+      Typ = read_vbr_uint();
     }
 
     Result = StructType::get(Elements);
     break;
   }
   case Type::PointerTyID: {
-    Result = PointerType::get(readSanitizedType());
+    Result = PointerType::get(readType());
     break;
   }
 
@@ -1676,14 +1493,6 @@
   if (!hasSignlessDivRem && !hasSignlessShrCastSetcc)
     return Opcode;
 
-#if 0
-  // If this is a bytecode format that did not include the unreachable
-  // instruction, bump up the opcode number to adjust it.
-  if (hasNoUnreachableInst)
-    if (Opcode >= 6 && Opcode < 62)
-      ++Opcode;
-#endif
-
   // If this is bytecode version 6, that only had signed Rem and Div 
   // instructions, then we must compensate for those two instructions only.
   // So that the switch statement below works, we're trying to turn this into
@@ -1805,46 +1614,39 @@
   unsigned isExprNumArgs = read_vbr_uint();
 
   if (isExprNumArgs) {
-    if (!hasNoUndefValue) {
-      // 'undef' is encoded with 'exprnumargs' == 1.
-      if (isExprNumArgs == 1)
-        return UndefValue::get(getType(TypeID));
-
-      // Inline asm is encoded with exprnumargs == ~0U.
-      if (isExprNumArgs == ~0U) {
-        std::string AsmStr = read_str();
-        std::string ConstraintStr = read_str();
-        unsigned Flags = read_vbr_uint();
-        
-        const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
-        const FunctionType *FTy = 
-          PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
-
-        if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
-          error("Invalid constraints for inline asm");
-        if (Flags & ~1U)
-          error("Invalid flags for inline asm");
-        bool HasSideEffects = Flags & 1;
-        return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
-      }
+    // 'undef' is encoded with 'exprnumargs' == 1.
+    if (isExprNumArgs == 1)
+      return UndefValue::get(getType(TypeID));
+
+    // Inline asm is encoded with exprnumargs == ~0U.
+    if (isExprNumArgs == ~0U) {
+      std::string AsmStr = read_str();
+      std::string ConstraintStr = read_str();
+      unsigned Flags = read_vbr_uint();
       
-      --isExprNumArgs;
+      const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
+      const FunctionType *FTy = 
+        PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+
+      if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
+        error("Invalid constraints for inline asm");
+      if (Flags & ~1U)
+        error("Invalid flags for inline asm");
+      bool HasSideEffects = Flags & 1;
+      return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
     }
+    
+    --isExprNumArgs;
 
     // FIXME: Encoding of constant exprs could be much more compact!
     std::vector<Constant*> ArgVec;
     ArgVec.reserve(isExprNumArgs);
     unsigned Opcode = read_vbr_uint();
 
-    // Bytecode files before LLVM 1.4 need have a missing terminator inst.
-    if (hasNoUnreachableInst) Opcode++;
-
     // Read the slot number and types of each of the arguments
     for (unsigned i = 0; i != isExprNumArgs; ++i) {
       unsigned ArgValSlot = read_vbr_uint();
-      unsigned ArgTypeSlot = 0;
-      if (read_typeid(ArgTypeSlot))
-        error("Invalid argument type (type type) for constant value");
+      unsigned ArgTypeSlot = read_vbr_uint();
 
       // Get the arg value from its slot if it exists, otherwise a placeholder
       ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot));
@@ -1863,20 +1665,6 @@
       return Result;
     } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr
       std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end());
-
-      if (hasRestrictedGEPTypes) {
-        const Type *BaseTy = ArgVec[0]->getType();
-        generic_gep_type_iterator<std::vector<Constant*>::iterator>
-          GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()),
-          E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end());
-        for (unsigned i = 0; GTI != E; ++GTI, ++i)
-          if (isa<StructType>(*GTI)) {
-            if (IdxList[i]->getType() != Type::UByteTy)
-              error("Invalid index for getelementptr!");
-            IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy);
-          }
-      }
-
       Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList);
       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
       return Result;
@@ -2068,9 +1856,7 @@
 /// Parse the constant strings section.
 void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){
   for (; NumEntries; --NumEntries) {
-    unsigned Typ = 0;
-    if (read_typeid(Typ))
-      error("Invalid type (type type) for string constant");
+    unsigned Typ = read_vbr_uint();
     const Type *Ty = getType(Typ);
     if (!isa<ArrayType>(Ty))
       error("String constant data invalid!");
@@ -2106,22 +1892,16 @@
   /// In LLVM 1.3 Type does not derive from Value so the types
   /// do not occupy a plane. Consequently, we read the types
   /// first in the constant pool.
-  if (isFunction && !hasTypeDerivedFromValue) {
+  if (isFunction) {
     unsigned NumEntries = read_vbr_uint();
     ParseTypes(TypeTab, NumEntries);
   }
 
   while (moreInBlock()) {
     unsigned NumEntries = read_vbr_uint();
-    unsigned Typ = 0;
-    bool isTypeType = read_typeid(Typ);
+    unsigned Typ = read_vbr_uint();
 
-    /// In LLVM 1.2 and before, Types were written to the
-    /// bytecode file in the "Type Type" plane (#12).
-    /// In 1.3 plane 12 is now the label plane.  Handle this here.
-    if (isTypeType) {
-      ParseTypes(TypeTab, NumEntries);
-    } else if (Typ == Type::VoidTyID) {
+    if (Typ == Type::VoidTyID) {
       /// Use of Type::VoidTyID is a misnomer. It actually means
       /// that the following plane is constant strings
       assert(&Tab == &ModuleValues && "Cannot read strings in functions!");
@@ -2213,20 +1993,6 @@
       ParseCompactionTable();
       break;
 
-    case BytecodeFormat::BasicBlock: {
-      if (!InsertedArguments) {
-        // Insert arguments into the value table before we parse the first basic
-        // block in the function, but after we potentially read in the
-        // compaction table.
-        insertArguments(F);
-        InsertedArguments = true;
-      }
-
-      BasicBlock *BB = ParseBasicBlock(BlockNum++);
-      F->getBasicBlockList().push_back(BB);
-      break;
-    }
-
     case BytecodeFormat::InstructionListBlockID: {
       // Insert arguments into the value table before we parse the instruction
       // list for the function, but after we potentially read in the compaction
@@ -2253,9 +2019,6 @@
       break;
     }
     BlockEnd = MyEnd;
-
-    // Malformed bc file if read past end of block.
-    align32();
   }
 
   // Make sure there were no references to non-existant basic blocks.
@@ -2382,11 +2145,6 @@
 void BytecodeReader::ParseGlobalTypes() {
   // Read the number of types
   unsigned NumEntries = read_vbr_uint();
-
-  // Ignore the type plane identifier for types if the bc file is pre 1.3
-  if (hasTypeDerivedFromValue)
-    read_vbr_uint();
-
   ParseTypes(ModuleTypes, NumEntries);
 }
 
@@ -2405,8 +2163,6 @@
     // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
     // Linkage, bit4+ = slot#
     unsigned SlotNo = VarType >> 5;
-    if (sanitizeTypeId(SlotNo))
-      error("Invalid type (type type) for global var!");
     unsigned LinkageID = (VarType >> 2) & 7;
     bool isConstant = VarType & 1;
     bool hasInitializer = (VarType & 2) != 0;
@@ -2477,9 +2233,6 @@
   // Read the function objects for all of the functions that are coming
   unsigned FnSignature = read_vbr_uint();
 
-  if (hasNoFlagsForFunctions)
-    FnSignature = (FnSignature << 5) + 1;
-
   // List is terminated by VoidTy.
   while (((FnSignature & (~0U >> 1)) >> 5) != Type::VoidTyID) {
     const Type *Ty = getType((FnSignature & (~0U >> 1)) >> 5);
@@ -2535,8 +2288,6 @@
 
     // Get the next function signature.
     FnSignature = read_vbr_uint();
-    if (hasNoFlagsForFunctions)
-      FnSignature = (FnSignature << 5) + 1;
   }
 
   // Now that the function signature list is set up, reverse it so that we can
@@ -2548,37 +2299,32 @@
   /// into this to get their section name.
   std::vector<std::string> SectionNames;
   
-  if (hasInconsistentModuleGlobalInfo) {
-    align32();
-  } else if (!hasNoDependentLibraries) {
-    // If this bytecode format has dependent library information in it, read in
-    // the number of dependent library items that follow.
-    unsigned num_dep_libs = read_vbr_uint();
-    std::string dep_lib;
-    while (num_dep_libs--) {
-      dep_lib = read_str();
-      TheModule->addLibrary(dep_lib);
-      if (Handler)
-        Handler->handleDependentLibrary(dep_lib);
-    }
-
-    // Read target triple and place into the module.
-    std::string triple = read_str();
-    TheModule->setTargetTriple(triple);
+  // Read in the dependent library information.
+  unsigned num_dep_libs = read_vbr_uint();
+  std::string dep_lib;
+  while (num_dep_libs--) {
+    dep_lib = read_str();
+    TheModule->addLibrary(dep_lib);
     if (Handler)
-      Handler->handleTargetTriple(triple);
-    
-    if (!hasAlignment && At != BlockEnd) {
-      // If the file has section info in it, read the section names now.
-      unsigned NumSections = read_vbr_uint();
-      while (NumSections--)
-        SectionNames.push_back(read_str());
-    }
-    
-    // If the file has module-level inline asm, read it now.
-    if (!hasAlignment && At != BlockEnd)
-      TheModule->setModuleInlineAsm(read_str());
+      Handler->handleDependentLibrary(dep_lib);
+  }
+
+  // Read target triple and place into the module.
+  std::string triple = read_str();
+  TheModule->setTargetTriple(triple);
+  if (Handler)
+    Handler->handleTargetTriple(triple);
+  
+  if (At != BlockEnd) {
+    // If the file has section info in it, read the section names now.
+    unsigned NumSections = read_vbr_uint();
+    while (NumSections--)
+      SectionNames.push_back(read_str());
   }
+  
+  // If the file has module-level inline asm, read it now.
+  if (At != BlockEnd)
+    TheModule->setModuleInlineAsm(read_str());
 
   // If any globals are in specified sections, assign them now.
   for (std::map<GlobalValue*, unsigned>::iterator I = SectionID.begin(), E =
@@ -2613,97 +2359,22 @@
 
   RevisionNum = Version >> 4;
 
-  // Default values for the current bytecode version
-  hasInconsistentModuleGlobalInfo = false;
-  hasExplicitPrimitiveZeros = false;
-  hasRestrictedGEPTypes = false;
-  hasTypeDerivedFromValue = false;
-  hasLongBlockHeaders = false;
-  has32BitTypes = false;
-  hasNoDependentLibraries = false;
-  hasAlignment = false;
-  hasNoUndefValue = false;
-  hasNoFlagsForFunctions = false;
-  hasNoUnreachableInst = false;
+  // Default the backwards compatibility flag values for the current BC version
   hasSignlessDivRem = false;
   hasSignlessShrCastSetcc = false;
 
   // Determine which backwards compatibility flags to set based on the
   // bytecode file's version number
   switch (RevisionNum) {
-  case 0:               //  LLVM 1.0, 1.1 (Released)
-    // Base LLVM 1.0 bytecode format.
-    hasInconsistentModuleGlobalInfo = true;
-    hasExplicitPrimitiveZeros = true;
-
-    // FALL THROUGH
-
-  case 1:               // LLVM 1.2 (Released)
-    // LLVM 1.2 added explicit support for emitting strings efficiently.
-
-    // Also, it fixed the problem where the size of the ModuleGlobalInfo block
-    // included the size for the alignment at the end, where the rest of the
-    // blocks did not.
-
-    // LLVM 1.2 and before required that GEP indices be ubyte constants for
-    // structures and longs for sequential types.
-    hasRestrictedGEPTypes = true;
-
-    // LLVM 1.2 and before had the Type class derive from Value class. This
-    // changed in release 1.3 and consequently LLVM 1.3 bytecode files are
-    // written differently because Types can no longer be part of the
-    // type planes for Values.
-    hasTypeDerivedFromValue = true;
-
-    // FALL THROUGH
-
-  case 2:                // 1.2.5 (Not Released)
-
-    // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful,
-    // especially for small files where the 8 bytes per block is a large
-    // fraction of the total block size. In LLVM 1.3, the block type and length
-    // are compressed into a single 32-bit unsigned integer. 27 bits for length,
-    // 5 bits for block type.
-    hasLongBlockHeaders = true;
-
-    // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
-    // this has been reduced to vbr_uint24. It shouldn't make much difference
-    // since we haven't run into a module with > 24 million types, but for
-    // safety the 24-bit restriction has been enforced in 1.3 to free some bits
-    // in various places and to ensure consistency.
-    has32BitTypes = true;
-
-    // LLVM 1.2 and earlier did not provide a target triple nor a list of
-    // libraries on which the bytecode is dependent. LLVM 1.3 provides these
-    // features, for use in future versions of LLVM.
-    hasNoDependentLibraries = true;
-
-    // FALL THROUGH
-
-  case 3:               // LLVM 1.3 (Released)
-    // LLVM 1.3 and earlier caused alignment bytes to be written on some block
-    // boundaries and at the end of some strings. In extreme cases (e.g. lots
-    // of GEP references to a constant array), this can increase the file size
-    // by 30% or more. In version 1.4 alignment is done away with completely.
-    hasAlignment = true;
-
-    // FALL THROUGH
-
-  case 4:               // 1.3.1 (Not Released)
-    // In version 4, we did not support the 'undef' constant.
-    hasNoUndefValue = true;
-
-    // In version 4 and above, we did not include space for flags for functions
-    // in the module info block.
-    hasNoFlagsForFunctions = true;
-
-    // In version 4 and above, we did not include the 'unreachable' instruction
-    // in the opcode numbering in the bytecode file.
-    hasNoUnreachableInst = true;
-
-    // FALL THROUGH
+  case 0: //  LLVM 1.0, 1.1 (Released)
+  case 1: // LLVM 1.2 (Released)
+  case 2: // 1.2.5 (Not Released)
+  case 3: // LLVM 1.3 (Released)
+  case 4: // 1.3.1 (Not Released)
+    error("Old bytecode formats no longer supported");
+    break;
 
-  case 5:               // 1.4 (Released)
+  case 5: // 1.4 (Released)
     // In version 6, the Div and Rem instructions were converted to their
     // signed and floating point counterparts: UDiv, SDiv, FDiv, URem, SRem, 
     // and FRem. Versions prior to 6 need to indicate that they have the 
@@ -2712,7 +2383,7 @@
 
     // FALL THROUGH
     
-  case 6:               // Signless Rem & Div Implementation (1.9 release)
+  case 6: // 1.9 (Released) 
     // In version 5 and prior, instructions were signless while integer types
     // were signed. In version 6, instructions became signed and types became
     // signless. For example in version 5 we have the DIV instruction but in
@@ -2747,7 +2418,6 @@
 
   // Read into instance variables...
   ParseVersionInfo();
-  align32();
 
   bool SeenModuleGlobalInfo = false;
   bool SeenGlobalTypePlane = false;
@@ -2794,7 +2464,6 @@
       break;
     }
     BlockEnd = MyEnd;
-    align32();
   }
 
   // After the module constant pool has been read, we can safely initialize


Index: llvm/lib/Bytecode/Reader/Reader.h
diff -u llvm/lib/Bytecode/Reader/Reader.h:1.37 llvm/lib/Bytecode/Reader/Reader.h:1.38
--- llvm/lib/Bytecode/Reader/Reader.h:1.37	Sat Nov 11 05:54:25 2006
+++ llvm/lib/Bytecode/Reader/Reader.h	Mon Nov 13 22:47:22 2006
@@ -292,69 +292,6 @@
 
   /// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
 
-  /// Revision #0 had an explicit alignment of data only for the
-  /// ModuleGlobalInfo block.  This was fixed to be like all other blocks in 1.2
-  bool hasInconsistentModuleGlobalInfo;
-
-  /// Revision #0 also explicitly encoded zero values for primitive types like
-  /// int/sbyte/etc.
-  bool hasExplicitPrimitiveZeros;
-
-  // Flags to control features specific the LLVM 1.2 and before (revision #1)
-
-  /// LLVM 1.2 and earlier required that getelementptr structure indices were
-  /// ubyte constants and that sequential type indices were longs.
-  bool hasRestrictedGEPTypes;
-
-  /// LLVM 1.2 and earlier had class Type deriving from Value and the Type
-  /// objects were located in the "Type Type" plane of various lists in read
-  /// by the bytecode reader. In LLVM 1.3 this is no longer the case. Types are
-  /// completely distinct from Values. Consequently, Types are written in fixed
-  /// locations in LLVM 1.3. This flag indicates that the older Type derived
-  /// from Value style of bytecode file is being read.
-  bool hasTypeDerivedFromValue;
-
-  /// LLVM 1.2 and earlier encoded block headers as two uint (8 bytes), one for
-  /// the size and one for the type. This is a bit wasteful, especially for
-  /// small files where the 8 bytes per block is a large fraction of the total
-  /// block size. In LLVM 1.3, the block type and length are encoded into a
-  /// single uint32 by restricting the number of block types (limit 31) and the
-  /// maximum size of a block (limit 2^27-1=134,217,727). Note that the module
-  /// block still uses the 8-byte format so the maximum size of a file can be
-  /// 2^32-1 bytes long.
-  bool hasLongBlockHeaders;
-
-  /// LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
-  /// this has been reduced to vbr_uint24. It shouldn't make much difference
-  /// since we haven't run into a module with > 24 million types, but for safety
-  /// the 24-bit restriction has been enforced in 1.3 to free some bits in
-  /// various places and to ensure consistency. In particular, global vars are
-  /// restricted to 24-bits.
-  bool has32BitTypes;
-
-  /// LLVM 1.2 and earlier did not provide a target triple nor a list of
-  /// libraries on which the bytecode is dependent. LLVM 1.3 provides these
-  /// features, for use in future versions of LLVM.
-  bool hasNoDependentLibraries;
-
-  /// LLVM 1.3 and earlier caused blocks and other fields to start on 32-bit
-  /// aligned boundaries. This can lead to as much as 30% bytecode size overhead
-  /// in various corner cases (lots of long instructions). In LLVM 1.4,
-  /// alignment of bytecode fields was done away with completely.
-  bool hasAlignment;
-
-  // In version 4 and earlier, the bytecode format did not support the 'undef'
-  // constant.
-  bool hasNoUndefValue;
-
-  // In version 4 and earlier, the bytecode format did not save space for flags
-  // in the global info block for functions.
-  bool hasNoFlagsForFunctions;
-
-  // In version 4 and earlier, there was no opcode space reserved for the
-  // unreachable instruction.
-  bool hasNoUnreachableInst;
-
   // In version 6, the Div and Rem instructions were converted to be the 
   // signed instructions UDiv, SDiv, URem and SRem. This flag will be true if
   // the Div and Rem instructions are signless (ver 5 and prior).
@@ -453,12 +390,8 @@
   /// @brief Converts a type slot number to its Type*
   const Type *getType(unsigned ID);
 
-  /// @brief Converts a pre-sanitized type slot number to its Type* and
-  /// sanitizes the type id.
-  inline const Type* getSanitizedType(unsigned& ID );
-
-  /// @brief Read in and get a sanitized type id
-  inline const Type* readSanitizedType();
+  /// @brief Read in a type id and turn it into a Type* 
+  inline const Type* readType();
 
   /// @brief Converts a Type* to its type slot number
   unsigned getTypeSlot(const Type *Ty);
@@ -559,12 +492,6 @@
 
   /// @brief Read a bytecode block header
   inline void read_block(unsigned &Type, unsigned &Size);
-
-  /// @brief Read a type identifier and sanitize it.
-  inline bool read_typeid(unsigned &TypeId);
-
-  /// @brief Recalculate type ID for pre 1.3 bytecode files.
-  inline bool sanitizeTypeId(unsigned &TypeId );
 /// @}
 };
 






More information about the llvm-commits mailing list