[llvm-commits] CVS: llvm/lib/Bytecode/Reader/InstructionReader.cpp Reader.cpp ReaderInternals.h

Chris Lattner lattner at cs.uiuc.edu
Wed Oct 8 17:54:01 PDT 2003


Changes in directory llvm/lib/Bytecode/Reader:

InstructionReader.cpp updated: 1.53 -> 1.54
Reader.cpp updated: 1.68 -> 1.69
ReaderInternals.h updated: 1.49 -> 1.50

---
Log message:

This patch substantially simplifies and cleans up handling of basic blocks
in the bytecode parser.  Before we tried to shoehorn basic blocks into the
"getValue" code path with other types of values.  For a variety of reasons
this was a bad idea, so this patch separates it out into its own data structure.

This simplifies the code, makes it fit in 80 columns, and is also much faster.
In a testcase provided by Bill, which has lots of PHI nodes, this patch speeds
up bytecode parsing from taking 6.9s to taking 2.32s.  More speedups to 
follow later.



---
Diffs of the changes:  (+84 -60)

Index: llvm/lib/Bytecode/Reader/InstructionReader.cpp
diff -u llvm/lib/Bytecode/Reader/InstructionReader.cpp:1.53 llvm/lib/Bytecode/Reader/InstructionReader.cpp:1.54
--- llvm/lib/Bytecode/Reader/InstructionReader.cpp:1.53	Wed Oct  8 16:18:57 2003
+++ llvm/lib/Bytecode/Reader/InstructionReader.cpp	Wed Oct  8 17:52:53 2003
@@ -161,12 +161,11 @@
             delete PN; 
 	    return true;
     case 2: PN->addIncoming(getValue(Raw->Ty, Raw->Arg1),
-			    cast<BasicBlock>(getValue(Type::LabelTyID,
-                                                      Raw->Arg2)));
+                            getBasicBlock(Raw->Arg2));
       break;
     default:
-      PN->addIncoming(getValue(Raw->Ty, Raw->Arg1), 
-		      cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg2)));
+      PN->addIncoming(getValue(Raw->Ty, Raw->Arg1),
+		      getBasicBlock(Raw->Arg2));
       if (Raw->VarArgs->size() & 1) {
 	std::cerr << "PHI Node with ODD number of arguments!\n";
 	delete PN;
@@ -174,8 +173,7 @@
       } else {
         std::vector<unsigned> &args = *Raw->VarArgs;
         for (unsigned i = 0; i < args.size(); i+=2)
-          PN->addIncoming(getValue(Raw->Ty, args[i]),
-                       cast<BasicBlock>(getValue(Type::LabelTyID, args[i+1])));
+          PN->addIncoming(getValue(Raw->Ty, args[i]), getBasicBlock(args[i+1]));
       }
       delete Raw->VarArgs; 
       break;
@@ -200,20 +198,18 @@
 
   case Instruction::Br:
     if (Raw->NumOperands == 1) {
-      Res = new BranchInst(cast<BasicBlock>(getValue(Type::LabelTyID,Raw->Arg1)));
+      Res = new BranchInst(getBasicBlock(Raw->Arg1));
       return false;
     } else if (Raw->NumOperands == 3) {
-      Res = new BranchInst(cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg1)),
-			   cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg2)),
-                                            getValue(Type::BoolTyID , Raw->Arg3));
+      Res = new BranchInst(getBasicBlock(Raw->Arg1), getBasicBlock(Raw->Arg2),
+                           getValue(Type::BoolTyID , Raw->Arg3));
       return false;
     }
     break;
     
   case Instruction::Switch: {
     SwitchInst *I = 
-      new SwitchInst(getValue(Raw->Ty, Raw->Arg1), 
-                     cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg2)));
+      new SwitchInst(getValue(Raw->Ty, Raw->Arg1), getBasicBlock(Raw->Arg2));
     Res = I;
     if (Raw->NumOperands < 3) return false;  // No destinations?  Weird.
 
@@ -226,7 +222,7 @@
     std::vector<unsigned> &args = *Raw->VarArgs;
     for (unsigned i = 0; i < args.size(); i += 2)
       I->addCase(cast<Constant>(getValue(Raw->Ty, args[i])),
-                 cast<BasicBlock>(getValue(Type::LabelTyID, args[i+1])));
+                 getBasicBlock(args[i+1]));
 
     delete Raw->VarArgs;
     return false;
@@ -311,11 +307,11 @@
     if (!FTy->isVarArg()) {
       if (Raw->NumOperands < 3) return true;
 
-      Normal = cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg2));
+      Normal = getBasicBlock(Raw->Arg2);
       if (Raw->NumOperands == 3)
-        Except = cast<BasicBlock>(getValue(Type::LabelTyID, Raw->Arg3));
+        Except = getBasicBlock(Raw->Arg3);
       else {
-        Except = cast<BasicBlock>(getValue(Type::LabelTyID, args[0]));
+        Except = getBasicBlock(args[0]);
 
         FunctionType::ParamTypes::const_iterator It = PL.begin();
         for (unsigned i = 1; i < args.size(); i++) {
@@ -327,10 +323,11 @@
       }
     } else {
       if (args.size() < 4) return true;
-      if (getType(args[0]) != Type::LabelTy || 
-          getType(args[2]) != Type::LabelTy) return true;
-      Normal = cast<BasicBlock>(getValue(Type::LabelTyID, args[1]));
-      Except = cast<BasicBlock>(getValue(Type::LabelTyID, args[3]));
+      if (args[0] != Type::LabelTyID || args[2] != Type::LabelTyID)
+        return true;
+          
+      Normal = getBasicBlock(args[1]);
+      Except = getBasicBlock(args[3]);
 
       if ((args.size() & 1) != 0)
 	return true;  // Must be pairs of type/value


Index: llvm/lib/Bytecode/Reader/Reader.cpp
diff -u llvm/lib/Bytecode/Reader/Reader.cpp:1.68 llvm/lib/Bytecode/Reader/Reader.cpp:1.69
--- llvm/lib/Bytecode/Reader/Reader.cpp:1.68	Wed Oct  8 16:51:46 2003
+++ llvm/lib/Bytecode/Reader/Reader.cpp	Wed Oct  8 17:52:54 2003
@@ -119,6 +119,7 @@
 
 Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) {
   assert(type != Type::TypeTyID && "getValue() cannot get types!");
+  assert(type != Type::LabelTyID && "getValue() cannot get blocks!");
   unsigned Num = oNum;
 
   if (HasImplicitZeroInitializer && type >= FirstDerivedTyID) {
@@ -138,15 +139,30 @@
 
   if (!Create) return 0;  // Do not create a placeholder?
 
-  const Type *Ty = getType(type);
-  Value *Val = type == Type::LabelTyID ? (Value*)new BBPHolder(Ty, oNum) : 
-                                         (Value*)new ValPHolder(Ty, oNum);
-
-  assert(Val != 0 && "How did we not make something?");
+  Value *Val = new ValPHolder(getType(type), oNum);
   if (insertValue(Val, LateResolveValues) == -1) return 0;
   return Val;
 }
 
+/// getBasicBlock - Get a particular numbered basic block, which might be a
+/// forward reference.  This works together with ParseBasicBlock to handle these
+/// forward references in a clean manner.
+///
+BasicBlock *BytecodeParser::getBasicBlock(unsigned ID) {
+  // Make sure there is room in the table...
+  if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1);
+
+  // First check to see if this is a backwards reference, i.e., ParseBasicBlock
+  // has already created this block, or if the forward reference has already
+  // been created.
+  if (ParsedBasicBlocks[ID])
+    return ParsedBasicBlocks[ID];
+
+  // Otherwise, the basic block has not yet been created.  Do so and add it to
+  // the ParsedBasicBlocks list.
+  return ParsedBasicBlocks[ID] = new BasicBlock();
+}
+
 /// getConstantValue - Just like getValue, except that it returns a null pointer
 /// only on error.  It always returns a constant (meaning that if the value is
 /// defined, but is not a constant, that is an error).  If the specified
@@ -176,10 +192,16 @@
 }
 
 
-std::auto_ptr<BasicBlock>
-BytecodeParser::ParseBasicBlock(const unsigned char *&Buf,
-                                const unsigned char *EndBuf) {
-  std::auto_ptr<BasicBlock> BB(new BasicBlock());
+BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf,
+                                            const unsigned char *EndBuf,
+                                            unsigned BlockNo) {
+  BasicBlock *BB;
+  if (ParsedBasicBlocks.size() == BlockNo)
+    ParsedBasicBlocks.push_back(BB = new BasicBlock());
+  else if (ParsedBasicBlocks[BlockNo] == 0)
+    BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
+  else
+    BB = ParsedBasicBlocks[BlockNo];
 
   while (Buf < EndBuf) {
     Instruction *Inst;
@@ -199,7 +221,8 @@
 
 void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf,
                                       const unsigned char *EndBuf,
-                                      SymbolTable *ST) {
+                                      SymbolTable *ST,
+                                      Function *CurrentFunction) {
   while (Buf < EndBuf) {
     // Symtab block header: [num entries][type id number]
     unsigned NumEntries, Typ;
@@ -219,10 +242,17 @@
       if (read(Buf, EndBuf, Name, false))  // Not aligned...
         throw std::string("Buffer not aligned.");
 
-      Value *V;
+      Value *V = 0;
       if (Typ == Type::TypeTyID)
         V = (Value*)getType(slot);
-      else
+      else if (Typ == Type::LabelTyID) {
+        if (CurrentFunction) {
+          // FIXME: THIS IS N^2!!!
+          Function::iterator BlockIterator = CurrentFunction->begin();
+          std::advance(BlockIterator, slot);
+          V = BlockIterator;
+        }
+      } else
         V = getValue(Typ, slot, false); // Find mapping...
       if (V == 0) throw std::string("Failed value look-up.");
       BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << *V;
@@ -254,9 +284,8 @@
   GlobalRefs.erase(I);                // Remove the map entry for it
 }
 
-void
-BytecodeParser::ParseFunction(const unsigned char *&Buf,
-                              const unsigned char *EndBuf) {
+void BytecodeParser::ParseFunction(const unsigned char *&Buf,
+                                   const unsigned char *EndBuf) {
   if (FunctionSignatureList.empty())
     throw std::string("FunctionSignatureList empty!");
 
@@ -312,6 +341,9 @@
       throw std::string("Error reading function arguments!");
   }
 
+  // Keep track of how many basic blocks we have read in...
+  unsigned BlockNum = 0;
+
   while (Buf < EndBuf) {
     unsigned Type, Size;
     const unsigned char *OldBuf = Buf;
@@ -326,17 +358,14 @@
 
     case BytecodeFormat::BasicBlock: {
       BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n");
-      std::auto_ptr<BasicBlock> BB = ParseBasicBlock(Buf, Buf+Size);
-      if (!BB.get() || insertValue(BB.get(), Values) == -1)
-        throw std::string("Parse error: BasicBlock");
-
-      F->getBasicBlockList().push_back(BB.release());
+      BasicBlock *BB = ParseBasicBlock(Buf, Buf+Size, BlockNum++);
+      F->getBasicBlockList().push_back(BB);
       break;
     }
 
     case BytecodeFormat::SymbolTable: {
       BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
-      ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable());
+      ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable(), F);
       break;
     }
 
@@ -353,6 +382,12 @@
     ALIGN32(Buf, EndBuf);
   }
 
+  // Make sure there were no references to non-existant basic blocks.
+  if (BlockNum != ParsedBasicBlocks.size())
+    throw std::string("Illegal basic block operand reference");
+  ParsedBasicBlocks.clear();
+
+
   // Check for unresolvable references
   while (!LateResolveValues.empty()) {
     ValueList &VL = *LateResolveValues.back();
@@ -582,7 +617,7 @@
 
     case BytecodeFormat::SymbolTable:
       BCR_TRACE(1, "BLOCK BytecodeFormat::SymbolTable: {\n");
-      ParseSymbolTable(Buf, Buf+Size, &TheModule->getSymbolTable());
+      ParseSymbolTable(Buf, Buf+Size, &TheModule->getSymbolTable(), 0);
       break;
 
     default:


Index: llvm/lib/Bytecode/Reader/ReaderInternals.h
diff -u llvm/lib/Bytecode/Reader/ReaderInternals.h:1.49 llvm/lib/Bytecode/Reader/ReaderInternals.h:1.50
--- llvm/lib/Bytecode/Reader/ReaderInternals.h:1.49	Wed Oct  8 16:51:46 2003
+++ llvm/lib/Bytecode/Reader/ReaderInternals.h	Wed Oct  8 17:52:54 2003
@@ -103,6 +103,8 @@
   ValueTable Values, LateResolveValues;
   ValueTable ModuleValues;
 
+  std::vector<BasicBlock*> ParsedBasicBlocks;
+
   // GlobalRefs - This maintains a mapping between <Type, Slot #>'s and forward
   // references to global values or constants.  Such values may be referenced
   // before they are defined, and if so, the temporary object that they
@@ -153,15 +155,16 @@
   void ParseVersionInfo   (const unsigned char *&Buf, const unsigned char *End);
   void ParseModuleGlobalInfo(const unsigned char *&Buf, const unsigned char *E);
   void ParseSymbolTable(const unsigned char *&Buf, const unsigned char *End,
-                        SymbolTable *);
+                        SymbolTable *, Function *CurrentFunction);
   void ParseFunction(const unsigned char *&Buf, const unsigned char *End);
   void ParseGlobalTypes(const unsigned char *&Buf, const unsigned char *EndBuf);
 
-  std::auto_ptr<BasicBlock>
-  ParseBasicBlock(const unsigned char *&Buf, const unsigned char *End);
+  BasicBlock *ParseBasicBlock(const unsigned char *&Buf,
+                              const unsigned char *End,
+                              unsigned BlockNo);
 
-  bool ParseInstruction   (const unsigned char *&Buf, const unsigned char *End,
-                           Instruction *&);
+  bool ParseInstruction(const unsigned char *&Buf, const unsigned char *End,
+                        Instruction *&);
   std::auto_ptr<RawInst> ParseRawInst(const unsigned char *&Buf,
                                       const unsigned char *End);
 
@@ -179,6 +182,7 @@
   Value      *getValue(const Type *Ty, unsigned num, bool Create = true);
   Value      *getValue(unsigned TypeID, unsigned num, bool Create = true);
   const Type *getType(unsigned ID);
+  BasicBlock *getBasicBlock(unsigned ID);
   Constant   *getConstantValue(const Type *Ty, unsigned num);
 
   int insertValue(Value *V, ValueTable &Table);  // -1 = Failure
@@ -207,12 +211,6 @@
   virtual Instruction *clone() const { abort(); return 0; }
 };
 
-struct BBPlaceHolderHelper : public BasicBlock {
-  BBPlaceHolderHelper(const Type *Ty) : BasicBlock() {
-    assert(Ty == Type::LabelTy);
-  }
-};
-
 struct ConstantPlaceHolderHelper : public Constant {
   ConstantPlaceHolderHelper(const Type *Ty)
     : Constant(Ty) {}
@@ -220,7 +218,6 @@
 };
 
 typedef PlaceholderDef<InstPlaceHolderHelper>  ValPHolder;
-typedef PlaceholderDef<BBPlaceHolderHelper>    BBPHolder;
 typedef PlaceholderDef<ConstantPlaceHolderHelper>  ConstPHolder;
 
 // Some common errors we find
@@ -232,12 +229,7 @@
 static inline unsigned getValueIDNumberFromPlaceHolder(Value *Val) {
   if (isa<Constant>(Val))
     return ((ConstPHolder*)Val)->getID();
-  
-  // else discriminate by type
-  switch (Val->getType()->getPrimitiveID()) {
-  case Type::LabelTyID:    return ((BBPHolder*)Val)->getID();
-  default:                 return ((ValPHolder*)Val)->getID();
-  }
+  return ((ValPHolder*)Val)->getID();
 }
 
 static inline void readBlock(const unsigned char *&Buf,





More information about the llvm-commits mailing list