[llvm-commits] CVS: llvm/lib/Bytecode/Reader/Analyzer.cpp Reader.cpp Reader.h
Reid Spencer
reid at x10sys.com
Mon Nov 13 20:47:39 PST 2006
Changes in directory llvm/lib/Bytecode/Reader:
Analyzer.cpp updated: 1.24 -> 1.25
Reader.cpp updated: 1.204 -> 1.205
Reader.h updated: 1.37 -> 1.38
---
Log message:
Discard code that supported old bytecode formats. This makes the Bytecode
Reader code much easier to read and maintain. Backwards compatibility from
version 5 format has been retained. Older formats will produce an error.
---
Diffs of the changes: (+146 -550)
Analyzer.cpp | 2
Reader.cpp | 617 +++++++++++++----------------------------------------------
Reader.h | 77 -------
3 files changed, 146 insertions(+), 550 deletions(-)
Index: llvm/lib/Bytecode/Reader/Analyzer.cpp
diff -u llvm/lib/Bytecode/Reader/Analyzer.cpp:1.24 llvm/lib/Bytecode/Reader/Analyzer.cpp:1.25
--- llvm/lib/Bytecode/Reader/Analyzer.cpp:1.24 Thu Nov 2 19:44:51 2006
+++ llvm/lib/Bytecode/Reader/Analyzer.cpp Mon Nov 13 22:47:22 2006
@@ -532,7 +532,7 @@
assert(BType >= BytecodeFormat::ModuleBlockID);
assert(BType < BytecodeFormat::NumberOfBlockIDs);
bca.BlockSizes[
- llvm::BytecodeFormat::CompressedBytecodeBlockIdentifiers(BType)] += Size;
+ llvm::BytecodeFormat::BytecodeBlockIdentifiers(BType)] += Size;
if (bca.version < 3) // Check for long block headers versions
bca.BlockSizes[llvm::BytecodeFormat::Reserved_DoNotUse] += 8;
Index: llvm/lib/Bytecode/Reader/Reader.cpp
diff -u llvm/lib/Bytecode/Reader/Reader.cpp:1.204 llvm/lib/Bytecode/Reader/Reader.cpp:1.205
--- llvm/lib/Bytecode/Reader/Reader.cpp:1.204 Wed Nov 8 15:27:54 2006
+++ llvm/lib/Bytecode/Reader/Reader.cpp Mon Nov 13 22:47:22 2006
@@ -73,18 +73,6 @@
" block.");
}
-/// Align the buffer position to a 32 bit boundary
-inline void BytecodeReader::align32() {
- if (hasAlignment) {
- BufPtr Save = At;
- At = (const unsigned char *)((intptr_t)(At+3) & (~3UL));
- if (At > Save)
- if (Handler) Handler->handleAlignment(At - Save);
- if (At > BlockEnd)
- error("Ran out of data while aligning!");
- }
-}
-
/// Read a whole unsigned integer
inline unsigned BytecodeReader::read_uint() {
if (At+4 > BlockEnd)
@@ -179,43 +167,9 @@
/// Read a block header and obtain its type and size
inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) {
- if ( hasLongBlockHeaders ) {
- Type = read_uint();
- Size = read_uint();
- switch (Type) {
- case BytecodeFormat::Reserved_DoNotUse :
- error("Reserved_DoNotUse used as Module Type?");
- Type = BytecodeFormat::ModuleBlockID; break;
- case BytecodeFormat::Module:
- Type = BytecodeFormat::ModuleBlockID; break;
- case BytecodeFormat::Function:
- Type = BytecodeFormat::FunctionBlockID; break;
- case BytecodeFormat::ConstantPool:
- Type = BytecodeFormat::ConstantPoolBlockID; break;
- case BytecodeFormat::SymbolTable:
- Type = BytecodeFormat::SymbolTableBlockID; break;
- case BytecodeFormat::ModuleGlobalInfo:
- Type = BytecodeFormat::ModuleGlobalInfoBlockID; break;
- case BytecodeFormat::GlobalTypePlane:
- Type = BytecodeFormat::GlobalTypePlaneBlockID; break;
- case BytecodeFormat::InstructionList:
- Type = BytecodeFormat::InstructionListBlockID; break;
- case BytecodeFormat::CompactionTable:
- Type = BytecodeFormat::CompactionTableBlockID; break;
- case BytecodeFormat::BasicBlock:
- /// This block type isn't used after version 1.1. However, we have to
- /// still allow the value in case this is an old bc format file.
- /// We just let its value creep thru.
- break;
- default:
- error("Invalid block id found: " + utostr(Type));
- break;
- }
- } else {
- Size = read_uint();
- Type = Size & 0x1F; // mask low order five bits
- Size >>= 5; // get rid of five low order bits, leaving high 27
- }
+ Size = read_uint(); // Read the header
+ Type = Size & 0x1F; // mask low order five bits to get type
+ Size >>= 5; // high order 27 bits is the size
BlockStart = At;
if (At + Size > BlockEnd)
error("Attempt to size a block past end of memory");
@@ -223,56 +177,13 @@
if (Handler) Handler->handleBlock(Type, BlockStart, Size);
}
-
-/// In LLVM 1.2 and before, Types were derived from Value and so they were
-/// written as part of the type planes along with any other Value. In LLVM
-/// 1.3 this changed so that Type does not derive from Value. Consequently,
-/// the BytecodeReader's containers for Values can't contain Types because
-/// there's no inheritance relationship. This means that the "Type Type"
-/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3
-/// whenever a bytecode construct must have both types and values together,
-/// the types are always read/written first and then the Values. Furthermore
-/// since Type::TypeTyID no longer exists, its value (12) now corresponds to
-/// Type::LabelTyID. In order to overcome this we must "sanitize" all the
-/// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change.
-/// For LLVM 1.2 and before, this function will decrement the type id by
-/// one to account for the missing Type::TypeTyID enumerator if the value is
-/// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this
-/// function returns true, otherwise false. This helps detect situations
-/// where the pre 1.3 bytecode is indicating that what follows is a type.
-/// @returns true iff type id corresponds to pre 1.3 "type type"
-inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) {
- if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later
- if (TypeId == Type::LabelTyID) {
- TypeId = Type::VoidTyID; // sanitize it
- return true; // indicate we got TypeTyID in pre 1.3 bytecode
- } else if (TypeId > Type::LabelTyID)
- --TypeId; // shift all planes down because type type plane is missing
- }
- return false;
-}
-
-/// Reads a vbr uint to read in a type id and does the necessary
-/// conversion on it by calling sanitizeTypeId.
-/// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type"
-/// @see sanitizeTypeId
-inline bool BytecodeReader::read_typeid(unsigned &TypeId) {
- TypeId = read_vbr_uint();
- if ( !has32BitTypes )
- if ( TypeId == 0x00FFFFFF )
- TypeId = read_vbr_uint();
- return sanitizeTypeId(TypeId);
-}
-
//===----------------------------------------------------------------------===//
// IR Lookup Methods
//===----------------------------------------------------------------------===//
/// Determine if a type id has an implicit null value
inline bool BytecodeReader::hasImplicitNull(unsigned TyID) {
- if (!hasExplicitPrimitiveZeros)
- return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
- return TyID >= Type::FirstDerivedTyID;
+ return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
}
/// Obtain a type given a typeid and account for things like compaction tables,
@@ -304,23 +215,11 @@
return Type::VoidTy;
}
-/// Get a sanitized type id. This just makes sure that the \p ID
-/// is both sanitized and not the "type type" of pre-1.3 bytecode.
-/// @see sanitizeTypeId
-inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) {
- if (sanitizeTypeId(ID))
- error("Invalid type id encountered");
- return getType(ID);
-}
-
-/// This method just saves some coding. It uses read_typeid to read
+/// This method just saves some coding. It uses read_vbr_uint to read
/// in a sanitized type id, errors that its not the type type, and
/// then calls getType to return the type value.
-inline const Type* BytecodeReader::readSanitizedType() {
- unsigned ID;
- if (read_typeid(ID))
- error("Invalid type id encountered");
- return getType(ID);
+inline const Type* BytecodeReader::readType() {
+ return getType(read_vbr_uint());
}
/// Get the slot number associated with a type accounting for primitive
@@ -590,12 +489,6 @@
if (!hasSignlessDivRem && !hasSignlessShrCastSetcc)
return 0; // The opcode is fine the way it is.
- // If this is a bytecode format that did not include the unreachable
- // instruction, bump up the opcode number to adjust it.
- if (hasNoUnreachableInst)
- if (Opcode >= 6 && Opcode < 62)
- ++Opcode;
-
// If this is bytecode version 6, that only had signed Rem and Div
// instructions, then we must compensate for those two instructions only.
// So that the switch statement below works, we're trying to turn this into
@@ -779,7 +672,7 @@
CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
BB->getInstList().push_back(bar);
BB->getInstList().push_back(new StoreInst(bar, foo));
- Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
+ Instruction* tmp = new VAArgInst(foo, getType(Oprnds[1]));
BB->getInstList().push_back(tmp);
Result = new LoadInst(foo);
break;
@@ -803,7 +696,7 @@
CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
BB->getInstList().push_back(bar);
BB->getInstList().push_back(new StoreInst(bar, foo));
- Result = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
+ Result = new VAArgInst(foo, getType(Oprnds[1]));
break;
}
case 34: // Select
@@ -919,11 +812,10 @@
for (unsigned i = 0; i != NumOprnds; ++i)
Oprnds[i] = read_vbr_uint();
- align32();
break;
}
- const Type *InstTy = getSanitizedType(iType);
+ const Type *InstTy = getType(iType);
// Make the necessary adjustments for dealing with backwards compatibility
// of opcodes.
@@ -955,7 +847,7 @@
if (Oprnds.size() != 2)
error("Invalid VAArg instruction!");
Result = new VAArgInst(getValue(iType, Oprnds[0]),
- getSanitizedType(Oprnds[1]));
+ getType(Oprnds[1]));
break;
case Instruction::ExtractElement: {
if (Oprnds.size() != 2)
@@ -1001,7 +893,7 @@
if (Oprnds.size() != 2)
error("Invalid Cast instruction!");
Result = new CastInst(getValue(iType, Oprnds[0]),
- getSanitizedType(Oprnds[1]));
+ getType(Oprnds[1]));
break;
case Instruction::Select:
if (Oprnds.size() != 3)
@@ -1235,34 +1127,22 @@
unsigned ValIdx = Oprnds[i];
unsigned IdxTy = 0;
- if (!hasRestrictedGEPTypes) {
- // Struct indices are always uints, sequential type indices can be
- // any of the 32 or 64-bit integer types. The actual choice of
- // type is encoded in the low two bits of the slot number.
- if (isa<StructType>(TopTy))
- IdxTy = Type::UIntTyID;
- else {
- switch (ValIdx & 3) {
- default:
- case 0: IdxTy = Type::UIntTyID; break;
- case 1: IdxTy = Type::IntTyID; break;
- case 2: IdxTy = Type::ULongTyID; break;
- case 3: IdxTy = Type::LongTyID; break;
- }
- ValIdx >>= 2;
+ // Struct indices are always uints, sequential type indices can be
+ // any of the 32 or 64-bit integer types. The actual choice of
+ // type is encoded in the low two bits of the slot number.
+ if (isa<StructType>(TopTy))
+ IdxTy = Type::UIntTyID;
+ else {
+ switch (ValIdx & 3) {
+ default:
+ case 0: IdxTy = Type::UIntTyID; break;
+ case 1: IdxTy = Type::IntTyID; break;
+ case 2: IdxTy = Type::ULongTyID; break;
+ case 3: IdxTy = Type::LongTyID; break;
}
- } else {
- IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID;
+ ValIdx >>= 2;
}
-
Idx.push_back(getValue(IdxTy, ValIdx));
-
- // Convert ubyte struct indices into uint struct indices.
- if (isa<StructType>(TopTy) && hasRestrictedGEPTypes)
- if (ConstantInt *C = dyn_cast<ConstantInt>(Idx.back()))
- if (C->getType() == Type::UByteTy)
- Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy);
-
NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true);
}
@@ -1309,16 +1189,16 @@
}
/// Get a particular numbered basic block, which might be a forward reference.
-/// This works together with ParseBasicBlock to handle these forward references
-/// in a clean manner. This function is used when constructing phi, br, switch,
-/// and other instructions that reference basic blocks. Blocks are numbered
-/// sequentially as they appear in the function.
+/// This works together with ParseInstructionList to handle these forward
+/// references in a clean manner. This function is used when constructing
+/// phi, br, switch, and other instructions that reference basic blocks.
+/// Blocks are numbered sequentially as they appear in the function.
BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) {
// Make sure there is room in the table...
if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1);
- // First check to see if this is a backwards reference, i.e., ParseBasicBlock
- // has already created this block, or if the forward reference has already
+ // First check to see if this is a backwards reference, i.e. this block
+ // has already been created, or if the forward reference has already
// been created.
if (ParsedBasicBlocks[ID])
return ParsedBasicBlocks[ID];
@@ -1328,34 +1208,10 @@
return ParsedBasicBlocks[ID] = new BasicBlock();
}
-/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time.
-/// This method reads in one of the basicblock packets. This method is not used
-/// for bytecode files after LLVM 1.0
-/// @returns The basic block constructed.
-BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) {
- if (Handler) Handler->handleBasicBlockBegin(BlockNo);
-
- BasicBlock *BB = 0;
-
- if (ParsedBasicBlocks.size() == BlockNo)
- ParsedBasicBlocks.push_back(BB = new BasicBlock());
- else if (ParsedBasicBlocks[BlockNo] == 0)
- BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
- else
- BB = ParsedBasicBlocks[BlockNo];
-
- std::vector<unsigned> Operands;
- while (moreInBlock())
- ParseInstruction(Operands, BB);
-
- if (Handler) Handler->handleBasicBlockEnd(BlockNo);
- return BB;
-}
-
/// Parse all of the BasicBlock's & Instruction's in the body of a function.
/// In post 1.0 bytecode files, we no longer emit basic block individually,
/// in order to avoid per-basic-block overhead.
-/// @returns Rhe number of basic blocks encountered.
+/// @returns the number of basic blocks encountered.
unsigned BytecodeReader::ParseInstructionList(Function* F) {
unsigned BlockNo = 0;
std::vector<unsigned> Args;
@@ -1401,52 +1257,35 @@
E = CurrentFunction->end(); I != E; ++I)
BBMap.push_back(I);
- /// In LLVM 1.3 we write types separately from values so
- /// The types are always first in the symbol table. This is
- /// because Type no longer derives from Value.
- if (!hasTypeDerivedFromValue) {
- // Symtab block header: [num entries]
- unsigned NumEntries = read_vbr_uint();
- for (unsigned i = 0; i < NumEntries; ++i) {
- // Symtab entry: [def slot #][name]
- unsigned slot = read_vbr_uint();
- std::string Name = read_str();
- const Type* T = getType(slot);
- ST->insert(Name, T);
- }
+ // Symtab block header: [num entries]
+ unsigned NumEntries = read_vbr_uint();
+ for (unsigned i = 0; i < NumEntries; ++i) {
+ // Symtab entry: [def slot #][name]
+ unsigned slot = read_vbr_uint();
+ std::string Name = read_str();
+ const Type* T = getType(slot);
+ ST->insert(Name, T);
}
while (moreInBlock()) {
// Symtab block header: [num entries][type id number]
unsigned NumEntries = read_vbr_uint();
- unsigned Typ = 0;
- bool isTypeType = read_typeid(Typ);
+ unsigned Typ = read_vbr_uint();
for (unsigned i = 0; i != NumEntries; ++i) {
// Symtab entry: [def slot #][name]
unsigned slot = read_vbr_uint();
std::string Name = read_str();
-
- // if we're reading a pre 1.3 bytecode file and the type plane
- // is the "type type", handle it here
- if (isTypeType) {
- const Type* T = getType(slot);
- if (T == 0)
- error("Failed type look-up for name '" + Name + "'");
- ST->insert(Name, T);
- continue; // code below must be short circuited
+ Value *V = 0;
+ if (Typ == Type::LabelTyID) {
+ if (slot < BBMap.size())
+ V = BBMap[slot];
} else {
- Value *V = 0;
- if (Typ == Type::LabelTyID) {
- if (slot < BBMap.size())
- V = BBMap[slot];
- } else {
- V = getValue(Typ, slot, false); // Find mapping...
- }
- if (V == 0)
- error("Failed value look-up for name '" + Name + "'");
- V->setName(Name);
+ V = getValue(Typ, slot, false); // Find mapping...
}
+ if (V == 0)
+ error("Failed value look-up for name '" + Name + "'");
+ V->setName(Name);
}
}
checkPastBlockEnd("Symbol Table");
@@ -1456,9 +1295,7 @@
/// Read in the types portion of a compaction table.
void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) {
for (unsigned i = 0; i != NumEntries; ++i) {
- unsigned TypeSlot = 0;
- if (read_typeid(TypeSlot))
- error("Invalid type in compaction table: type type");
+ unsigned TypeSlot = read_vbr_uint();
const Type *Typ = getGlobalTableType(TypeSlot);
CompactionTypes.push_back(std::make_pair(Typ, TypeSlot));
if (Handler) Handler->handleCompactionTableType(i, TypeSlot, Typ);
@@ -1471,14 +1308,9 @@
// Notify handler that we're beginning a compaction table.
if (Handler) Handler->handleCompactionTableBegin();
- // In LLVM 1.3 Type no longer derives from Value. So,
- // we always write them first in the compaction table
- // because they can't occupy a "type plane" where the
- // Values reside.
- if (! hasTypeDerivedFromValue) {
- unsigned NumEntries = read_vbr_uint();
- ParseCompactionTypes(NumEntries);
- }
+ // Get the types for the compaction table.
+ unsigned NumEntries = read_vbr_uint();
+ ParseCompactionTypes(NumEntries);
// Compaction tables live in separate blocks so we have to loop
// until we've read the whole thing.
@@ -1486,7 +1318,6 @@
// Read the number of Value* entries in the compaction table
unsigned NumEntries = read_vbr_uint();
unsigned Ty = 0;
- unsigned isTypeType = false;
// Decode the type from value read in. Most compaction table
// planes will have one or two entries in them. If that's the
@@ -1496,42 +1327,35 @@
// In this case, both low-order bits are set (value 3). This
// is a signal that the typeid follows.
NumEntries >>= 2;
- isTypeType = read_typeid(Ty);
+ Ty = read_vbr_uint();
} else {
// In this case, the low-order bits specify the number of entries
// and the high order bits specify the type.
Ty = NumEntries >> 2;
- isTypeType = sanitizeTypeId(Ty);
NumEntries &= 3;
}
- // if we're reading a pre 1.3 bytecode file and the type plane
- // is the "type type", handle it here
- if (isTypeType) {
- ParseCompactionTypes(NumEntries);
- } else {
- // Make sure we have enough room for the plane.
- if (Ty >= CompactionValues.size())
- CompactionValues.resize(Ty+1);
-
- // Make sure the plane is empty or we have some kind of error.
- if (!CompactionValues[Ty].empty())
- error("Compaction table plane contains multiple entries!");
-
- // Notify handler about the plane.
- if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
-
- // Push the implicit zero.
- CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
-
- // Read in each of the entries, put them in the compaction table
- // and notify the handler that we have a new compaction table value.
- for (unsigned i = 0; i != NumEntries; ++i) {
- unsigned ValSlot = read_vbr_uint();
- Value *V = getGlobalTableValue(Ty, ValSlot);
- CompactionValues[Ty].push_back(V);
- if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
- }
+ // Make sure we have enough room for the plane.
+ if (Ty >= CompactionValues.size())
+ CompactionValues.resize(Ty+1);
+
+ // Make sure the plane is empty or we have some kind of error.
+ if (!CompactionValues[Ty].empty())
+ error("Compaction table plane contains multiple entries!");
+
+ // Notify handler about the plane.
+ if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
+
+ // Push the implicit zero.
+ CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
+
+ // Read in each of the entries, put them in the compaction table
+ // and notify the handler that we have a new compaction table value.
+ for (unsigned i = 0; i != NumEntries; ++i) {
+ unsigned ValSlot = read_vbr_uint();
+ Value *V = getGlobalTableValue(Ty, ValSlot);
+ CompactionValues[Ty].push_back(V);
+ if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
}
}
// Notify handler that the compaction table is done.
@@ -1543,23 +1367,20 @@
// a derived type, then additional data is read to fill out the type
// definition.
const Type *BytecodeReader::ParseType() {
- unsigned PrimType = 0;
- if (read_typeid(PrimType))
- error("Invalid type (type type) in type constants!");
-
+ unsigned PrimType = read_vbr_uint();
const Type *Result = 0;
if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType)))
return Result;
switch (PrimType) {
case Type::FunctionTyID: {
- const Type *RetType = readSanitizedType();
+ const Type *RetType = readType();
unsigned NumParams = read_vbr_uint();
std::vector<const Type*> Params;
while (NumParams--)
- Params.push_back(readSanitizedType());
+ Params.push_back(readType());
bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
if (isVarArg) Params.pop_back();
@@ -1568,34 +1389,30 @@
break;
}
case Type::ArrayTyID: {
- const Type *ElementType = readSanitizedType();
+ const Type *ElementType = readType();
unsigned NumElements = read_vbr_uint();
Result = ArrayType::get(ElementType, NumElements);
break;
}
case Type::PackedTyID: {
- const Type *ElementType = readSanitizedType();
+ const Type *ElementType = readType();
unsigned NumElements = read_vbr_uint();
Result = PackedType::get(ElementType, NumElements);
break;
}
case Type::StructTyID: {
std::vector<const Type*> Elements;
- unsigned Typ = 0;
- if (read_typeid(Typ))
- error("Invalid element type (type type) for structure!");
-
+ unsigned Typ = read_vbr_uint();
while (Typ) { // List is terminated by void/0 typeid
Elements.push_back(getType(Typ));
- if (read_typeid(Typ))
- error("Invalid element type (type type) for structure!");
+ Typ = read_vbr_uint();
}
Result = StructType::get(Elements);
break;
}
case Type::PointerTyID: {
- Result = PointerType::get(readSanitizedType());
+ Result = PointerType::get(readType());
break;
}
@@ -1676,14 +1493,6 @@
if (!hasSignlessDivRem && !hasSignlessShrCastSetcc)
return Opcode;
-#if 0
- // If this is a bytecode format that did not include the unreachable
- // instruction, bump up the opcode number to adjust it.
- if (hasNoUnreachableInst)
- if (Opcode >= 6 && Opcode < 62)
- ++Opcode;
-#endif
-
// If this is bytecode version 6, that only had signed Rem and Div
// instructions, then we must compensate for those two instructions only.
// So that the switch statement below works, we're trying to turn this into
@@ -1805,46 +1614,39 @@
unsigned isExprNumArgs = read_vbr_uint();
if (isExprNumArgs) {
- if (!hasNoUndefValue) {
- // 'undef' is encoded with 'exprnumargs' == 1.
- if (isExprNumArgs == 1)
- return UndefValue::get(getType(TypeID));
-
- // Inline asm is encoded with exprnumargs == ~0U.
- if (isExprNumArgs == ~0U) {
- std::string AsmStr = read_str();
- std::string ConstraintStr = read_str();
- unsigned Flags = read_vbr_uint();
-
- const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
- const FunctionType *FTy =
- PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
-
- if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
- error("Invalid constraints for inline asm");
- if (Flags & ~1U)
- error("Invalid flags for inline asm");
- bool HasSideEffects = Flags & 1;
- return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
- }
+ // 'undef' is encoded with 'exprnumargs' == 1.
+ if (isExprNumArgs == 1)
+ return UndefValue::get(getType(TypeID));
+
+ // Inline asm is encoded with exprnumargs == ~0U.
+ if (isExprNumArgs == ~0U) {
+ std::string AsmStr = read_str();
+ std::string ConstraintStr = read_str();
+ unsigned Flags = read_vbr_uint();
- --isExprNumArgs;
+ const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
+ const FunctionType *FTy =
+ PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+
+ if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
+ error("Invalid constraints for inline asm");
+ if (Flags & ~1U)
+ error("Invalid flags for inline asm");
+ bool HasSideEffects = Flags & 1;
+ return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
}
+
+ --isExprNumArgs;
// FIXME: Encoding of constant exprs could be much more compact!
std::vector<Constant*> ArgVec;
ArgVec.reserve(isExprNumArgs);
unsigned Opcode = read_vbr_uint();
- // Bytecode files before LLVM 1.4 need have a missing terminator inst.
- if (hasNoUnreachableInst) Opcode++;
-
// Read the slot number and types of each of the arguments
for (unsigned i = 0; i != isExprNumArgs; ++i) {
unsigned ArgValSlot = read_vbr_uint();
- unsigned ArgTypeSlot = 0;
- if (read_typeid(ArgTypeSlot))
- error("Invalid argument type (type type) for constant value");
+ unsigned ArgTypeSlot = read_vbr_uint();
// Get the arg value from its slot if it exists, otherwise a placeholder
ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot));
@@ -1863,20 +1665,6 @@
return Result;
} else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr
std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end());
-
- if (hasRestrictedGEPTypes) {
- const Type *BaseTy = ArgVec[0]->getType();
- generic_gep_type_iterator<std::vector<Constant*>::iterator>
- GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()),
- E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end());
- for (unsigned i = 0; GTI != E; ++GTI, ++i)
- if (isa<StructType>(*GTI)) {
- if (IdxList[i]->getType() != Type::UByteTy)
- error("Invalid index for getelementptr!");
- IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy);
- }
- }
-
Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList);
if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
return Result;
@@ -2068,9 +1856,7 @@
/// Parse the constant strings section.
void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){
for (; NumEntries; --NumEntries) {
- unsigned Typ = 0;
- if (read_typeid(Typ))
- error("Invalid type (type type) for string constant");
+ unsigned Typ = read_vbr_uint();
const Type *Ty = getType(Typ);
if (!isa<ArrayType>(Ty))
error("String constant data invalid!");
@@ -2106,22 +1892,16 @@
/// In LLVM 1.3 Type does not derive from Value so the types
/// do not occupy a plane. Consequently, we read the types
/// first in the constant pool.
- if (isFunction && !hasTypeDerivedFromValue) {
+ if (isFunction) {
unsigned NumEntries = read_vbr_uint();
ParseTypes(TypeTab, NumEntries);
}
while (moreInBlock()) {
unsigned NumEntries = read_vbr_uint();
- unsigned Typ = 0;
- bool isTypeType = read_typeid(Typ);
+ unsigned Typ = read_vbr_uint();
- /// In LLVM 1.2 and before, Types were written to the
- /// bytecode file in the "Type Type" plane (#12).
- /// In 1.3 plane 12 is now the label plane. Handle this here.
- if (isTypeType) {
- ParseTypes(TypeTab, NumEntries);
- } else if (Typ == Type::VoidTyID) {
+ if (Typ == Type::VoidTyID) {
/// Use of Type::VoidTyID is a misnomer. It actually means
/// that the following plane is constant strings
assert(&Tab == &ModuleValues && "Cannot read strings in functions!");
@@ -2213,20 +1993,6 @@
ParseCompactionTable();
break;
- case BytecodeFormat::BasicBlock: {
- if (!InsertedArguments) {
- // Insert arguments into the value table before we parse the first basic
- // block in the function, but after we potentially read in the
- // compaction table.
- insertArguments(F);
- InsertedArguments = true;
- }
-
- BasicBlock *BB = ParseBasicBlock(BlockNum++);
- F->getBasicBlockList().push_back(BB);
- break;
- }
-
case BytecodeFormat::InstructionListBlockID: {
// Insert arguments into the value table before we parse the instruction
// list for the function, but after we potentially read in the compaction
@@ -2253,9 +2019,6 @@
break;
}
BlockEnd = MyEnd;
-
- // Malformed bc file if read past end of block.
- align32();
}
// Make sure there were no references to non-existant basic blocks.
@@ -2382,11 +2145,6 @@
void BytecodeReader::ParseGlobalTypes() {
// Read the number of types
unsigned NumEntries = read_vbr_uint();
-
- // Ignore the type plane identifier for types if the bc file is pre 1.3
- if (hasTypeDerivedFromValue)
- read_vbr_uint();
-
ParseTypes(ModuleTypes, NumEntries);
}
@@ -2405,8 +2163,6 @@
// VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
// Linkage, bit4+ = slot#
unsigned SlotNo = VarType >> 5;
- if (sanitizeTypeId(SlotNo))
- error("Invalid type (type type) for global var!");
unsigned LinkageID = (VarType >> 2) & 7;
bool isConstant = VarType & 1;
bool hasInitializer = (VarType & 2) != 0;
@@ -2477,9 +2233,6 @@
// Read the function objects for all of the functions that are coming
unsigned FnSignature = read_vbr_uint();
- if (hasNoFlagsForFunctions)
- FnSignature = (FnSignature << 5) + 1;
-
// List is terminated by VoidTy.
while (((FnSignature & (~0U >> 1)) >> 5) != Type::VoidTyID) {
const Type *Ty = getType((FnSignature & (~0U >> 1)) >> 5);
@@ -2535,8 +2288,6 @@
// Get the next function signature.
FnSignature = read_vbr_uint();
- if (hasNoFlagsForFunctions)
- FnSignature = (FnSignature << 5) + 1;
}
// Now that the function signature list is set up, reverse it so that we can
@@ -2548,37 +2299,32 @@
/// into this to get their section name.
std::vector<std::string> SectionNames;
- if (hasInconsistentModuleGlobalInfo) {
- align32();
- } else if (!hasNoDependentLibraries) {
- // If this bytecode format has dependent library information in it, read in
- // the number of dependent library items that follow.
- unsigned num_dep_libs = read_vbr_uint();
- std::string dep_lib;
- while (num_dep_libs--) {
- dep_lib = read_str();
- TheModule->addLibrary(dep_lib);
- if (Handler)
- Handler->handleDependentLibrary(dep_lib);
- }
-
- // Read target triple and place into the module.
- std::string triple = read_str();
- TheModule->setTargetTriple(triple);
+ // Read in the dependent library information.
+ unsigned num_dep_libs = read_vbr_uint();
+ std::string dep_lib;
+ while (num_dep_libs--) {
+ dep_lib = read_str();
+ TheModule->addLibrary(dep_lib);
if (Handler)
- Handler->handleTargetTriple(triple);
-
- if (!hasAlignment && At != BlockEnd) {
- // If the file has section info in it, read the section names now.
- unsigned NumSections = read_vbr_uint();
- while (NumSections--)
- SectionNames.push_back(read_str());
- }
-
- // If the file has module-level inline asm, read it now.
- if (!hasAlignment && At != BlockEnd)
- TheModule->setModuleInlineAsm(read_str());
+ Handler->handleDependentLibrary(dep_lib);
+ }
+
+ // Read target triple and place into the module.
+ std::string triple = read_str();
+ TheModule->setTargetTriple(triple);
+ if (Handler)
+ Handler->handleTargetTriple(triple);
+
+ if (At != BlockEnd) {
+ // If the file has section info in it, read the section names now.
+ unsigned NumSections = read_vbr_uint();
+ while (NumSections--)
+ SectionNames.push_back(read_str());
}
+
+ // If the file has module-level inline asm, read it now.
+ if (At != BlockEnd)
+ TheModule->setModuleInlineAsm(read_str());
// If any globals are in specified sections, assign them now.
for (std::map<GlobalValue*, unsigned>::iterator I = SectionID.begin(), E =
@@ -2613,97 +2359,22 @@
RevisionNum = Version >> 4;
- // Default values for the current bytecode version
- hasInconsistentModuleGlobalInfo = false;
- hasExplicitPrimitiveZeros = false;
- hasRestrictedGEPTypes = false;
- hasTypeDerivedFromValue = false;
- hasLongBlockHeaders = false;
- has32BitTypes = false;
- hasNoDependentLibraries = false;
- hasAlignment = false;
- hasNoUndefValue = false;
- hasNoFlagsForFunctions = false;
- hasNoUnreachableInst = false;
+ // Default the backwards compatibility flag values for the current BC version
hasSignlessDivRem = false;
hasSignlessShrCastSetcc = false;
// Determine which backwards compatibility flags to set based on the
// bytecode file's version number
switch (RevisionNum) {
- case 0: // LLVM 1.0, 1.1 (Released)
- // Base LLVM 1.0 bytecode format.
- hasInconsistentModuleGlobalInfo = true;
- hasExplicitPrimitiveZeros = true;
-
- // FALL THROUGH
-
- case 1: // LLVM 1.2 (Released)
- // LLVM 1.2 added explicit support for emitting strings efficiently.
-
- // Also, it fixed the problem where the size of the ModuleGlobalInfo block
- // included the size for the alignment at the end, where the rest of the
- // blocks did not.
-
- // LLVM 1.2 and before required that GEP indices be ubyte constants for
- // structures and longs for sequential types.
- hasRestrictedGEPTypes = true;
-
- // LLVM 1.2 and before had the Type class derive from Value class. This
- // changed in release 1.3 and consequently LLVM 1.3 bytecode files are
- // written differently because Types can no longer be part of the
- // type planes for Values.
- hasTypeDerivedFromValue = true;
-
- // FALL THROUGH
-
- case 2: // 1.2.5 (Not Released)
-
- // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful,
- // especially for small files where the 8 bytes per block is a large
- // fraction of the total block size. In LLVM 1.3, the block type and length
- // are compressed into a single 32-bit unsigned integer. 27 bits for length,
- // 5 bits for block type.
- hasLongBlockHeaders = true;
-
- // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
- // this has been reduced to vbr_uint24. It shouldn't make much difference
- // since we haven't run into a module with > 24 million types, but for
- // safety the 24-bit restriction has been enforced in 1.3 to free some bits
- // in various places and to ensure consistency.
- has32BitTypes = true;
-
- // LLVM 1.2 and earlier did not provide a target triple nor a list of
- // libraries on which the bytecode is dependent. LLVM 1.3 provides these
- // features, for use in future versions of LLVM.
- hasNoDependentLibraries = true;
-
- // FALL THROUGH
-
- case 3: // LLVM 1.3 (Released)
- // LLVM 1.3 and earlier caused alignment bytes to be written on some block
- // boundaries and at the end of some strings. In extreme cases (e.g. lots
- // of GEP references to a constant array), this can increase the file size
- // by 30% or more. In version 1.4 alignment is done away with completely.
- hasAlignment = true;
-
- // FALL THROUGH
-
- case 4: // 1.3.1 (Not Released)
- // In version 4, we did not support the 'undef' constant.
- hasNoUndefValue = true;
-
- // In version 4 and above, we did not include space for flags for functions
- // in the module info block.
- hasNoFlagsForFunctions = true;
-
- // In version 4 and above, we did not include the 'unreachable' instruction
- // in the opcode numbering in the bytecode file.
- hasNoUnreachableInst = true;
-
- // FALL THROUGH
+ case 0: // LLVM 1.0, 1.1 (Released)
+ case 1: // LLVM 1.2 (Released)
+ case 2: // 1.2.5 (Not Released)
+ case 3: // LLVM 1.3 (Released)
+ case 4: // 1.3.1 (Not Released)
+ error("Old bytecode formats no longer supported");
+ break;
- case 5: // 1.4 (Released)
+ case 5: // 1.4 (Released)
// In version 6, the Div and Rem instructions were converted to their
// signed and floating point counterparts: UDiv, SDiv, FDiv, URem, SRem,
// and FRem. Versions prior to 6 need to indicate that they have the
@@ -2712,7 +2383,7 @@
// FALL THROUGH
- case 6: // Signless Rem & Div Implementation (1.9 release)
+ case 6: // 1.9 (Released)
// In version 5 and prior, instructions were signless while integer types
// were signed. In version 6, instructions became signed and types became
// signless. For example in version 5 we have the DIV instruction but in
@@ -2747,7 +2418,6 @@
// Read into instance variables...
ParseVersionInfo();
- align32();
bool SeenModuleGlobalInfo = false;
bool SeenGlobalTypePlane = false;
@@ -2794,7 +2464,6 @@
break;
}
BlockEnd = MyEnd;
- align32();
}
// After the module constant pool has been read, we can safely initialize
Index: llvm/lib/Bytecode/Reader/Reader.h
diff -u llvm/lib/Bytecode/Reader/Reader.h:1.37 llvm/lib/Bytecode/Reader/Reader.h:1.38
--- llvm/lib/Bytecode/Reader/Reader.h:1.37 Sat Nov 11 05:54:25 2006
+++ llvm/lib/Bytecode/Reader/Reader.h Mon Nov 13 22:47:22 2006
@@ -292,69 +292,6 @@
/// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
- /// Revision #0 had an explicit alignment of data only for the
- /// ModuleGlobalInfo block. This was fixed to be like all other blocks in 1.2
- bool hasInconsistentModuleGlobalInfo;
-
- /// Revision #0 also explicitly encoded zero values for primitive types like
- /// int/sbyte/etc.
- bool hasExplicitPrimitiveZeros;
-
- // Flags to control features specific the LLVM 1.2 and before (revision #1)
-
- /// LLVM 1.2 and earlier required that getelementptr structure indices were
- /// ubyte constants and that sequential type indices were longs.
- bool hasRestrictedGEPTypes;
-
- /// LLVM 1.2 and earlier had class Type deriving from Value and the Type
- /// objects were located in the "Type Type" plane of various lists in read
- /// by the bytecode reader. In LLVM 1.3 this is no longer the case. Types are
- /// completely distinct from Values. Consequently, Types are written in fixed
- /// locations in LLVM 1.3. This flag indicates that the older Type derived
- /// from Value style of bytecode file is being read.
- bool hasTypeDerivedFromValue;
-
- /// LLVM 1.2 and earlier encoded block headers as two uint (8 bytes), one for
- /// the size and one for the type. This is a bit wasteful, especially for
- /// small files where the 8 bytes per block is a large fraction of the total
- /// block size. In LLVM 1.3, the block type and length are encoded into a
- /// single uint32 by restricting the number of block types (limit 31) and the
- /// maximum size of a block (limit 2^27-1=134,217,727). Note that the module
- /// block still uses the 8-byte format so the maximum size of a file can be
- /// 2^32-1 bytes long.
- bool hasLongBlockHeaders;
-
- /// LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
- /// this has been reduced to vbr_uint24. It shouldn't make much difference
- /// since we haven't run into a module with > 24 million types, but for safety
- /// the 24-bit restriction has been enforced in 1.3 to free some bits in
- /// various places and to ensure consistency. In particular, global vars are
- /// restricted to 24-bits.
- bool has32BitTypes;
-
- /// LLVM 1.2 and earlier did not provide a target triple nor a list of
- /// libraries on which the bytecode is dependent. LLVM 1.3 provides these
- /// features, for use in future versions of LLVM.
- bool hasNoDependentLibraries;
-
- /// LLVM 1.3 and earlier caused blocks and other fields to start on 32-bit
- /// aligned boundaries. This can lead to as much as 30% bytecode size overhead
- /// in various corner cases (lots of long instructions). In LLVM 1.4,
- /// alignment of bytecode fields was done away with completely.
- bool hasAlignment;
-
- // In version 4 and earlier, the bytecode format did not support the 'undef'
- // constant.
- bool hasNoUndefValue;
-
- // In version 4 and earlier, the bytecode format did not save space for flags
- // in the global info block for functions.
- bool hasNoFlagsForFunctions;
-
- // In version 4 and earlier, there was no opcode space reserved for the
- // unreachable instruction.
- bool hasNoUnreachableInst;
-
// In version 6, the Div and Rem instructions were converted to be the
// signed instructions UDiv, SDiv, URem and SRem. This flag will be true if
// the Div and Rem instructions are signless (ver 5 and prior).
@@ -453,12 +390,8 @@
/// @brief Converts a type slot number to its Type*
const Type *getType(unsigned ID);
- /// @brief Converts a pre-sanitized type slot number to its Type* and
- /// sanitizes the type id.
- inline const Type* getSanitizedType(unsigned& ID );
-
- /// @brief Read in and get a sanitized type id
- inline const Type* readSanitizedType();
+ /// @brief Read in a type id and turn it into a Type*
+ inline const Type* readType();
/// @brief Converts a Type* to its type slot number
unsigned getTypeSlot(const Type *Ty);
@@ -559,12 +492,6 @@
/// @brief Read a bytecode block header
inline void read_block(unsigned &Type, unsigned &Size);
-
- /// @brief Read a type identifier and sanitize it.
- inline bool read_typeid(unsigned &TypeId);
-
- /// @brief Recalculate type ID for pre 1.3 bytecode files.
- inline bool sanitizeTypeId(unsigned &TypeId );
/// @}
};
More information about the llvm-commits
mailing list