[llvm-commits] [parallel] CVS: llvm/lib/Transforms/Utils/CodeExtractor.cpp LoopExtractor.cpp BreakCriticalEdges.cpp CloneFunction.cpp CloneTrace.cpp InlineFunction.cpp Linker.cpp PromoteMemoryToRegister.cpp SimplifyCFG.cpp ValueMapper.cpp

Mon Mar 1 18:05:10 PST 2004

Changes in directory llvm/lib/Transforms/Utils:

CodeExtractor.cpp added (r1.2.2.1)
LoopExtractor.cpp added (r1.2.2.1)
BreakCriticalEdges.cpp updated: 1.18 -> 1.18.2.1
CloneFunction.cpp updated: 1.19 -> 1.19.2.1
CloneTrace.cpp updated: 1.5 -> 1.5.2.1
InlineFunction.cpp updated: 1.18 -> 1.18.2.1
Linker.cpp updated: 1.66 -> 1.66.2.1
PromoteMemoryToRegister.cpp updated: 1.59 -> 1.59.2.1
SimplifyCFG.cpp updated: 1.19 -> 1.19.2.1
ValueMapper.cpp updated: 1.10 -> 1.10.2.1

---
Log message:

Merge from trunk

---
Diffs of the changes:  (+1723 -308)

Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
diff -c /dev/null llvm/lib/Transforms/Utils/CodeExtractor.cpp:1.2.2.1
*** /dev/null	Mon Mar  1 17:58:28 2004

--- llvm/lib/Transforms/Utils/CodeExtractor.cpp	Mon Mar  1 17:58:16 2004
***************
*** 0 ****
--- 1,583 ----
+ //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+ // 
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by the LLVM research group and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ // 
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements the interface to tear out a code region, such as an
+ // individual loop or a parallel section, into a new function, replacing it with
+ // a call to the new function.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "llvm/BasicBlock.h"
+ #include "llvm/Constants.h"
+ #include "llvm/DerivedTypes.h"
+ #include "llvm/Instructions.h"
+ #include "llvm/Module.h"
+ #include "llvm/Pass.h"
+ #include "llvm/Analysis/LoopInfo.h"
+ #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+ #include "llvm/Transforms/Utils/FunctionUtils.h"
+ #include "Support/Debug.h"
+ #include "Support/StringExtras.h"
+ #include <algorithm>
+ #include <map>
+ #include <vector>
+ using namespace llvm;
+ 
+ namespace {
+ 
+   inline bool contains(const std::vector<BasicBlock*> &V, const BasicBlock *BB){
+     return std::find(V.begin(), V.end(), BB) != V.end();
+   }
+ 
+   /// getFunctionArg - Return a pointer to F's ARGNOth argument.
+   ///
+   Argument *getFunctionArg(Function *F, unsigned argno) {
+     Function::aiterator ai = F->abegin();
+     while (argno) { ++ai; --argno; }
+     return &*ai;
+   }
+ 
+   struct CodeExtractor {
+     typedef std::vector<Value*> Values;
+     typedef std::vector<std::pair<unsigned, unsigned> > PhiValChangesTy;
+     typedef std::map<PHINode*, PhiValChangesTy> PhiVal2ArgTy;
+     PhiVal2ArgTy PhiVal2Arg;
+ 
+   public:
+     Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+ 
+   private:
+     void findInputsOutputs(const std::vector<BasicBlock*> &code,
+                            Values &inputs,
+                            Values &outputs,
+                            BasicBlock *newHeader,
+                            BasicBlock *newRootNode);
+ 
+     void processPhiNodeInputs(PHINode *Phi,
+                               const std::vector<BasicBlock*> &code,
+                               Values &inputs,
+                               BasicBlock *newHeader,
+                               BasicBlock *newRootNode);
+ 
+     void rewritePhiNodes(Function *F, BasicBlock *newFuncRoot);
+ 
+     Function *constructFunction(const Values &inputs,
+                                 const Values &outputs,
+                                 BasicBlock *newRootNode, BasicBlock *newHeader,
+                                 const std::vector<BasicBlock*> &code,
+                                 Function *oldFunction, Module *M);
+ 
+     void moveCodeToFunction(const std::vector<BasicBlock*> &code,
+                             Function *newFunction);
+ 
+     void emitCallAndSwitchStatement(Function *newFunction,
+                                     BasicBlock *newHeader,
+                                     const std::vector<BasicBlock*> &code,
+                                     Values &inputs,
+                                     Values &outputs);
+ 
+   };
+ }
+ 
+ void CodeExtractor::processPhiNodeInputs(PHINode *Phi,
+                                          const std::vector<BasicBlock*> &code,
+                                          Values &inputs,
+                                          BasicBlock *codeReplacer,
+                                          BasicBlock *newFuncRoot)
+ {
+   // Separate incoming values and BasicBlocks as internal/external. We ignore
+   // the case where both the value and BasicBlock are internal, because we don't
+   // need to do a thing.
+   std::vector<unsigned> EValEBB;
+   std::vector<unsigned> EValIBB;
+   std::vector<unsigned> IValEBB;
+ 
+   for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+     Value *phiVal = Phi->getIncomingValue(i);
+     if (Instruction *Inst = dyn_cast<Instruction>(phiVal)) {
+       if (contains(code, Inst->getParent())) {
+         if (!contains(code, Phi->getIncomingBlock(i)))
+           IValEBB.push_back(i);
+       } else {
+         if (contains(code, Phi->getIncomingBlock(i)))
+           EValIBB.push_back(i);
+         else
+           EValEBB.push_back(i);
+       }
+     } else if (Constant *Const = dyn_cast<Constant>(phiVal)) {
+       // Constants are internal, but considered `external' if they are coming
+       // from an external block.
+       if (!contains(code, Phi->getIncomingBlock(i)))
+         EValEBB.push_back(i);
+     } else if (Argument *Arg = dyn_cast<Argument>(phiVal)) {
+       // arguments are external
+       if (contains(code, Phi->getIncomingBlock(i)))
+         EValIBB.push_back(i);
+       else
+         EValEBB.push_back(i);
+     } else {
+       phiVal->dump();
+       assert(0 && "Unhandled input in a Phi node");
+     }
+   }
+ 
+   // Both value and block are external. Need to group all of
+   // these, have an external phi, pass the result as an
+   // argument, and have THIS phi use that result.
+   if (EValEBB.size() > 0) {
+     if (EValEBB.size() == 1) {
+       // Now if it's coming from the newFuncRoot, it's that funky input
+       unsigned phiIdx = EValEBB[0];
+       if (!dyn_cast<Constant>(Phi->getIncomingValue(phiIdx)))
+       {
+         PhiVal2Arg[Phi].push_back(std::make_pair(phiIdx, inputs.size()));
+         // We can just pass this value in as argument
+         inputs.push_back(Phi->getIncomingValue(phiIdx));
+       }
+       Phi->setIncomingBlock(phiIdx, newFuncRoot);
+     } else {
+       PHINode *externalPhi = new PHINode(Phi->getType(), "extPhi");
+       codeReplacer->getInstList().insert(codeReplacer->begin(), externalPhi);
+       for (std::vector<unsigned>::iterator i = EValEBB.begin(),
+              e = EValEBB.end(); i != e; ++i)
+       {
+         externalPhi->addIncoming(Phi->getIncomingValue(*i),
+                                  Phi->getIncomingBlock(*i));
+ 
+         // We make these values invalid instead of deleting them because that
+         // would shift the indices of other values... The fixPhiNodes should
+         // clean these phi nodes up later.
+         Phi->setIncomingValue(*i, 0);
+         Phi->setIncomingBlock(*i, 0);
+       }
+       PhiVal2Arg[Phi].push_back(std::make_pair(Phi->getNumIncomingValues(),
+                                                inputs.size()));
+       // We can just pass this value in as argument
+       inputs.push_back(externalPhi);
+     }
+   }
+ 
+   // When the value is external, but block internal...
+   // just pass it in as argument, no change to phi node
+   for (std::vector<unsigned>::iterator i = EValIBB.begin(),
+          e = EValIBB.end(); i != e; ++i)
+   {
+     // rewrite the phi input node to be an argument
+     PhiVal2Arg[Phi].push_back(std::make_pair(*i, inputs.size()));
+     inputs.push_back(Phi->getIncomingValue(*i));
+   }
+ 
+   // Value internal, block external
+   // this can happen if we are extracting a part of a loop
+   for (std::vector<unsigned>::iterator i = IValEBB.begin(),
+          e = IValEBB.end(); i != e; ++i)
+   {
+     assert(0 && "Cannot (YET) handle internal values via external blocks");
+   }
+ }
+ 
+ 
+ void CodeExtractor::findInputsOutputs(const std::vector<BasicBlock*> &code,
+                                       Values &inputs,
+                                       Values &outputs,
+                                       BasicBlock *newHeader,
+                                       BasicBlock *newRootNode)
+ {
+   for (std::vector<BasicBlock*>::const_iterator ci = code.begin(), 
+        ce = code.end(); ci != ce; ++ci) {
+     BasicBlock *BB = *ci;
+     for (BasicBlock::iterator BBi = BB->begin(), BBe = BB->end();
+          BBi != BBe; ++BBi) {
+       // If a use is defined outside the region, it's an input.
+       // If a def is used outside the region, it's an output.
+       if (Instruction *I = dyn_cast<Instruction>(&*BBi)) {
+         // If it's a phi node
+         if (PHINode *Phi = dyn_cast<PHINode>(I)) {
+           processPhiNodeInputs(Phi, code, inputs, newHeader, newRootNode);
+         } else {
+           // All other instructions go through the generic input finder
+           // Loop over the operands of each instruction (inputs)
+           for (User::op_iterator op = I->op_begin(), opE = I->op_end();
+                op != opE; ++op) {
+             if (Instruction *opI = dyn_cast<Instruction>(op->get())) {
+               // Check if definition of this operand is within the loop
+               if (!contains(code, opI->getParent())) {
+                 // add this operand to the inputs
+                 inputs.push_back(opI);
+               }
+             }
+           }
+         }
+ 
+         // Consider uses of this instruction (outputs)
+         for (Value::use_iterator use = I->use_begin(), useE = I->use_end();
+              use != useE; ++use) {
+           if (Instruction* inst = dyn_cast<Instruction>(*use)) {
+             if (!contains(code, inst->getParent())) {
+               // add this op to the outputs
+               outputs.push_back(I);
+             }
+           }
+         }
+       } /* if */
+     } /* for: insts */
+   } /* for: basic blocks */
+ }
+ 
+ void CodeExtractor::rewritePhiNodes(Function *F,
+                                     BasicBlock *newFuncRoot) {
+   // Write any changes that were saved before: use function arguments as inputs
+   for (PhiVal2ArgTy::iterator i = PhiVal2Arg.begin(), e = PhiVal2Arg.end();
+        i != e; ++i)
+   {
+     PHINode *phi = (*i).first;
+     PhiValChangesTy &values = (*i).second;
+     for (unsigned cIdx = 0, ce = values.size(); cIdx != ce; ++cIdx)
+     {
+       unsigned phiValueIdx = values[cIdx].first, argNum = values[cIdx].second;
+       if (phiValueIdx < phi->getNumIncomingValues())
+         phi->setIncomingValue(phiValueIdx, getFunctionArg(F, argNum));
+       else
+         phi->addIncoming(getFunctionArg(F, argNum), newFuncRoot);
+     }
+   }
+ 
+   // Delete any invalid Phi node inputs that were marked as NULL previously
+   for (PhiVal2ArgTy::iterator i = PhiVal2Arg.begin(), e = PhiVal2Arg.end();
+        i != e; ++i)
+   {
+     PHINode *phi = (*i).first;
+     for (unsigned idx = 0, end = phi->getNumIncomingValues(); idx != end; ++idx)
+     {
+       if (phi->getIncomingValue(idx) == 0 && phi->getIncomingBlock(idx) == 0) {
+         phi->removeIncomingValue(idx);
+         --idx;
+         --end;
+       }
+     }
+   }
+ 
+   // We are done with the saved values
+   PhiVal2Arg.clear();
+ }
+ 
+ 
+ /// constructFunction - make a function based on inputs and outputs, as follows:
+ /// f(in0, ..., inN, out0, ..., outN)
+ ///
+ Function *CodeExtractor::constructFunction(const Values &inputs,
+                                            const Values &outputs,
+                                            BasicBlock *newRootNode,
+                                            BasicBlock *newHeader,
+                                            const std::vector<BasicBlock*> &code,
+                                            Function *oldFunction, Module *M) {
+   DEBUG(std::cerr << "inputs: " << inputs.size() << "\n");
+   DEBUG(std::cerr << "outputs: " << outputs.size() << "\n");
+   BasicBlock *header = code[0];
+ 
+   // This function returns unsigned, outputs will go back by reference.
+   Type *retTy = Type::UShortTy;
+   std::vector<const Type*> paramTy;
+ 
+   // Add the types of the input values to the function's argument list
+   for (Values::const_iterator i = inputs.begin(),
+          e = inputs.end(); i != e; ++i) {
+     const Value *value = *i;
+     DEBUG(std::cerr << "value used in func: " << value << "\n");
+     paramTy.push_back(value->getType());
+   }
+ 
+   // Add the types of the output values to the function's argument list, but
+   // make them pointer types for scalars
+   for (Values::const_iterator i = outputs.begin(),
+          e = outputs.end(); i != e; ++i) {
+     const Value *value = *i;
+     DEBUG(std::cerr << "instr used in func: " << value << "\n");
+     const Type *valueType = value->getType();
+     // Convert scalar types into a pointer of that type
+     if (valueType->isPrimitiveType()) {
+       valueType = PointerType::get(valueType);
+     }
+     paramTy.push_back(valueType);
+   }
+ 
+   DEBUG(std::cerr << "Function type: " << retTy << " f(");
+   for (std::vector<const Type*>::iterator i = paramTy.begin(),
+          e = paramTy.end(); i != e; ++i)
+     DEBUG(std::cerr << (*i) << ", ");
+   DEBUG(std::cerr << ")\n");
+ 
+   const FunctionType *funcType = FunctionType::get(retTy, paramTy, false);
+ 
+   // Create the new function
+   Function *newFunction = new Function(funcType,
+                                        GlobalValue::InternalLinkage,
+                                        oldFunction->getName() + "_code", M);
+   newFunction->getBasicBlockList().push_back(newRootNode);
+ 
+   for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+     std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+     for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+          use != useE; ++use) {
+       if (Instruction* inst = dyn_cast<Instruction>(*use)) {
+         if (contains(code, inst->getParent())) {
+           inst->replaceUsesOfWith(inputs[i], getFunctionArg(newFunction, i));
+         }
+       }
+     }
+   }
+ 
+   // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+   // within the new function. This must be done before we lose track of which
+   // blocks were originally in the code region.
+   std::vector<User*> Users(header->use_begin(), header->use_end());
+   for (std::vector<User*>::iterator i = Users.begin(), e = Users.end();
+        i != e; ++i) {
+     if (BranchInst *inst = dyn_cast<BranchInst>(*i)) {
+       BasicBlock *BB = inst->getParent();
+       if (!contains(code, BB) && BB->getParent() == oldFunction) {
+         // The BasicBlock which contains the branch is not in the region
+         // modify the branch target to a new block
+         inst->replaceUsesOfWith(header, newHeader);
+       }
+     }
+   }
+ 
+   return newFunction;
+ }
+ 
+ void CodeExtractor::moveCodeToFunction(const std::vector<BasicBlock*> &code,
+                                        Function *newFunction)
+ {
+   for (std::vector<BasicBlock*>::const_iterator i = code.begin(), e =code.end();
+        i != e; ++i) {
+     BasicBlock *BB = *i;
+     Function *oldFunc = BB->getParent();
+     Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+ 
+     // Delete the basic block from the old function, and the list of blocks
+     oldBlocks.remove(BB);
+ 
+     // Insert this basic block into the new function
+     Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+     newBlocks.push_back(BB);
+   }
+ }
+ 
+ void
+ CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
+                                           BasicBlock *codeReplacer,
+                                           const std::vector<BasicBlock*> &code,
+                                           Values &inputs,
+                                           Values &outputs)
+ {
+   // Emit a call to the new function, passing allocated memory for outputs and
+   // just plain inputs for non-scalars
+   std::vector<Value*> params;
+   BasicBlock *codeReplacerTail = new BasicBlock("codeReplTail",
+                                                 codeReplacer->getParent());
+   for (Values::const_iterator i = inputs.begin(),
+          e = inputs.end(); i != e; ++i)
+     params.push_back(*i);
+   for (Values::const_iterator i = outputs.begin(), 
+          e = outputs.end(); i != e; ++i) {
+     // Create allocas for scalar outputs
+     if ((*i)->getType()->isPrimitiveType()) {
+       Constant *one = ConstantUInt::get(Type::UIntTy, 1);
+       AllocaInst *alloca = new AllocaInst((*i)->getType(), one);
+       codeReplacer->getInstList().push_back(alloca);
+       params.push_back(alloca);
+ 
+       LoadInst *load = new LoadInst(alloca, "alloca");
+       codeReplacerTail->getInstList().push_back(load);
+       std::vector<User*> Users((*i)->use_begin(), (*i)->use_end());
+       for (std::vector<User*>::iterator use = Users.begin(), useE =Users.end();
+            use != useE; ++use) {
+         if (Instruction* inst = dyn_cast<Instruction>(*use)) {
+           if (!contains(code, inst->getParent())) {
+             inst->replaceUsesOfWith(*i, load);
+           }
+         }
+       }
+     } else {
+       params.push_back(*i);
+     }
+   }
+   CallInst *call = new CallInst(newFunction, params, "targetBlock");
+   codeReplacer->getInstList().push_back(call);
+   codeReplacer->getInstList().push_back(new BranchInst(codeReplacerTail));
+ 
+   // Now we can emit a switch statement using the call as a value.
+   // FIXME: perhaps instead of default being self BB, it should be a second
+   // dummy block which asserts that the value is not within the range...?
+   //BasicBlock *defaultBlock = new BasicBlock("defaultBlock", oldF);
+   //insert abort() ?
+   //defaultBlock->getInstList().push_back(new BranchInst(codeReplacer));
+ 
+   SwitchInst *switchInst = new SwitchInst(call, codeReplacerTail,
+                                           codeReplacerTail);
+ 
+   // Since there may be multiple exits from the original region, make the new
+   // function return an unsigned, switch on that number
+   unsigned switchVal = 0;
+   for (std::vector<BasicBlock*>::const_iterator i =code.begin(), e = code.end();
+        i != e; ++i) {
+     BasicBlock *BB = *i;
+ 
+     // rewrite the terminator of the original BasicBlock
+     Instruction *term = BB->getTerminator();
+     if (BranchInst *brInst = dyn_cast<BranchInst>(term)) {
+ 
+       // Restore values just before we exit
+       // FIXME: Use a GetElementPtr to bunch the outputs in a struct
+       for (unsigned outIdx = 0, outE = outputs.size(); outIdx != outE; ++outIdx)
+       {
+         new StoreInst(outputs[outIdx],
+                       getFunctionArg(newFunction, outIdx),
+                       brInst);
+       }
+ 
+       // Rewrite branches into exists which return a value based on which
+       // exit we take from this function
+       if (brInst->isUnconditional()) {
+         if (!contains(code, brInst->getSuccessor(0))) {
+           ConstantUInt *brVal = ConstantUInt::get(Type::UShortTy, switchVal);
+           ReturnInst *newRet = new ReturnInst(brVal);
+           // add a new target to the switch
+           switchInst->addCase(brVal, brInst->getSuccessor(0));
+           ++switchVal;
+           // rewrite the branch with a return
+           BasicBlock::iterator ii(brInst);
+           ReplaceInstWithInst(BB->getInstList(), ii, newRet);
+           delete brInst;
+         }
+       } else {
+         // Replace the conditional branch to branch
+         // to two new blocks, each of which returns a different code.
+         for (unsigned idx = 0; idx < 2; ++idx) {
+           BasicBlock *oldTarget = brInst->getSuccessor(idx);
+           if (!contains(code, oldTarget)) {
+             // add a new basic block which returns the appropriate value
+             BasicBlock *newTarget = new BasicBlock("newTarget", newFunction);
+             ConstantUInt *brVal = ConstantUInt::get(Type::UShortTy, switchVal);
+             ReturnInst *newRet = new ReturnInst(brVal);
+             newTarget->getInstList().push_back(newRet);
+             // rewrite the original branch instruction with this new target
+             brInst->setSuccessor(idx, newTarget);
+             // the switch statement knows what to do with this value
+             switchInst->addCase(brVal, oldTarget);
+             ++switchVal;
+           }
+         }
+       }
+     } else if (ReturnInst *retTerm = dyn_cast<ReturnInst>(term)) {
+       assert(0 && "Cannot handle return instructions just yet.");
+       // FIXME: what if the terminator is a return!??!
+       // Need to rewrite: add new basic block, move the return there
+       // treat the original as an unconditional branch to that basicblock
+     } else if (SwitchInst *swTerm = dyn_cast<SwitchInst>(term)) {
+       assert(0 && "Cannot handle switch instructions just yet.");
+     } else if (InvokeInst *invInst = dyn_cast<InvokeInst>(term)) {
+       assert(0 && "Cannot handle invoke instructions just yet.");
+     } else {
+       assert(0 && "Unrecognized terminator, or badly-formed BasicBlock.");
+     }
+   }
+ }
+ 
+ 
+ /// ExtractRegion - Removes a loop from a function, replaces it with a call to
+ /// new function. Returns pointer to the new function.
+ ///
+ /// algorithm:
+ ///
+ /// find inputs and outputs for the region
+ ///
+ /// for inputs: add to function as args, map input instr* to arg# 
+ /// for outputs: add allocas for scalars, 
+ ///             add to func as args, map output instr* to arg#
+ ///
+ /// rewrite func to use argument #s instead of instr*
+ ///
+ /// for each scalar output in the function: at every exit, store intermediate 
+ /// computed result back into memory.
+ ///
+ Function *CodeExtractor::ExtractCodeRegion(const std::vector<BasicBlock*> &code)
+ {
+   // 1) Find inputs, outputs
+   // 2) Construct new function
+   //  * Add allocas for defs, pass as args by reference
+   //  * Pass in uses as args
+   // 3) Move code region, add call instr to func
+   // 
+ 
+   Values inputs, outputs;
+ 
+   // Assumption: this is a single-entry code region, and the header is the first
+   // block in the region. FIXME: is this true for a list of blocks from a
+   // natural function?
+   BasicBlock *header = code[0];
+   Function *oldFunction = header->getParent();
+   Module *module = oldFunction->getParent();
+ 
+   // This takes place of the original loop
+   BasicBlock *codeReplacer = new BasicBlock("codeRepl", oldFunction);
+ 
+   // The new function needs a root node because other nodes can branch to the
+   // head of the loop, and the root cannot have predecessors
+   BasicBlock *newFuncRoot = new BasicBlock("newFuncRoot");
+   newFuncRoot->getInstList().push_back(new BranchInst(header));
+ 
+   // Find inputs to, outputs from the code region
+   //
+   // If one of the inputs is coming from a different basic block and it's in a
+   // phi node, we need to rewrite the phi node:
+   //
+   // * All the inputs which involve basic blocks OUTSIDE of this region go into
+   //   a NEW phi node that takes care of finding which value really came in.
+   //   The result of this phi is passed to the function as an argument. 
+   //
+   // * All the other phi values stay.
+   //
+   // FIXME: PHI nodes' incoming blocks aren't being rewritten to accomodate for
+   // blocks moving to a new function.
+   // SOLUTION: move Phi nodes out of the loop header into the codeReplacer, pass
+   // the values as parameters to the function
+   findInputsOutputs(code, inputs, outputs, codeReplacer, newFuncRoot);
+ 
+   // Step 2: Construct new function based on inputs/outputs,
+   // Add allocas for all defs
+   Function *newFunction = constructFunction(inputs, outputs, newFuncRoot, 
+                                             codeReplacer, code, 
+                                             oldFunction, module);
+ 
+   rewritePhiNodes(newFunction, newFuncRoot);
+ 
+   emitCallAndSwitchStatement(newFunction, codeReplacer, code, inputs, outputs);
+ 
+   moveCodeToFunction(code, newFunction);
+ 
+   return newFunction;
+ }
+ 
+ /// ExtractBasicBlock - slurp a natural loop into a brand new function
+ ///
+ Function* llvm::ExtractLoop(Loop *L) {
+   CodeExtractor CE;
+   return CE.ExtractCodeRegion(L->getBlocks());
+ }
+ 
+ /// ExtractBasicBlock - slurp a basic block into a brand new function
+ ///
+ Function* llvm::ExtractBasicBlock(BasicBlock *BB) {
+   CodeExtractor CE;
+   std::vector<BasicBlock*> Blocks;
+   Blocks.push_back(BB);
+   return CE.ExtractCodeRegion(Blocks);  
+ }


Index: llvm/lib/Transforms/Utils/LoopExtractor.cpp
diff -c /dev/null llvm/lib/Transforms/Utils/LoopExtractor.cpp:1.2.2.1
*** /dev/null	Mon Mar  1 17:58:28 2004
--- llvm/lib/Transforms/Utils/LoopExtractor.cpp	Mon Mar  1 17:58:16 2004
***************
*** 0 ****
--- 1,69 ----
+ //===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+ //
+ // A pass wrapper around the ExtractLoop() scalar transformation to extract each
+ // top-level loop into its own new function. If the loop is the ONLY loop in a
+ // given function, it is not touched.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "llvm/Module.h"
+ #include "llvm/Pass.h"
+ #include "llvm/Analysis/LoopInfo.h"
+ #include "llvm/Transforms/Scalar.h"
+ #include "llvm/Transforms/Utils/FunctionUtils.h"
+ #include <vector>
+ using namespace llvm;
+ 
+ namespace {
+ 
+ // FIXME: PassManager should allow Module passes to require FunctionPasses
+ struct LoopExtractor : public FunctionPass {
+ 
+ public:
+   LoopExtractor() {}
+   virtual bool run(Module &M);
+   virtual bool runOnFunction(Function &F);
+ 
+   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+     AU.addRequired<LoopInfo>();
+   }
+ 
+ };
+ 
+ RegisterOpt<LoopExtractor> 
+ X("loop-extract", "Extract loops into new functions");
+ 
+ bool LoopExtractor::run(Module &M) {
+   bool Changed = false;
+   for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i)
+     Changed |= runOnFunction(*i);
+   return Changed;
+ }
+ 
+ bool LoopExtractor::runOnFunction(Function &F) {
+   std::cerr << F.getName() << "\n";
+ 
+   LoopInfo &LI = getAnalysis<LoopInfo>();
+ 
+   // We don't want to keep extracting the only loop of a function into a new one
+   if (LI.begin() == LI.end() || LI.begin() + 1 == LI.end())
+     return false;
+ 
+   bool Changed = false;
+ 
+   // Try to move each loop out of the code into separate function
+   for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i)
+     Changed |= (ExtractLoop(*i) != 0);
+ 
+   return Changed;
+ }
+ 
+ 
+ 
+ } // End anonymous namespace 
+ 
+ /// createLoopExtractorPass 
+ ///
+ FunctionPass* llvm::createLoopExtractorPass() {
+   return new LoopExtractor();
+ }


Index: llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
diff -u llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp:1.18 llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp:1.18.2.1
--- llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp:1.18	Fri Jan  9 00:12:10 2004
+++ llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp	Mon Mar  1 17:58:16 2004
@@ -119,8 +119,11 @@
   //
   for (BasicBlock::iterator I = DestBB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    // We no longer enter through TIBB, now we come in through NewBB.
-    PN->replaceUsesOfWith(TIBB, NewBB);
+    // We no longer enter through TIBB, now we come in through NewBB.  Revector
+    // exactly one entry in the PHI node that used to come from TIBB to come
+    // from NewBB.
+    Value *InVal = PN->removeIncomingValue(TIBB, false);
+    PN->addIncoming(InVal, NewBB);
   }
 
   // If we don't have a pass object, we can't update anything...


Index: llvm/lib/Transforms/Utils/CloneFunction.cpp
diff -u llvm/lib/Transforms/Utils/CloneFunction.cpp:1.19 llvm/lib/Transforms/Utils/CloneFunction.cpp:1.19.2.1
--- llvm/lib/Transforms/Utils/CloneFunction.cpp:1.19	Fri Jan  9 00:12:12 2004
+++ llvm/lib/Transforms/Utils/CloneFunction.cpp	Mon Mar  1 17:58:16 2004
@@ -42,8 +42,8 @@
 // CloneBasicBlock - See comments in Cloning.h
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
                                   std::map<const Value*, Value*> &ValueMap,
-                                  const char *NameSuffix) {
-  BasicBlock *NewBB = new BasicBlock("");
+                                  const char *NameSuffix, Function *F) {
+  BasicBlock *NewBB = new BasicBlock("", F);
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   // Loop over all instructions copying them over...
@@ -82,8 +82,7 @@
     const BasicBlock &BB = *BI;
     
     // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix);
-    NewFunc->getBasicBlockList().push_back(CBB);
+    BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc);
     ValueMap[&BB] = CBB;                       // Add basic block mapping.
 
     if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
@@ -93,14 +92,11 @@
   // Loop over all of the instructions in the function, fixing up operand 
   // references as we go.  This uses ValueMap to do all the hard work.
   //
-  for (Function::const_iterator BB = OldFunc->begin(), BE = OldFunc->end();
-       BB != BE; ++BB) {
-    BasicBlock *NBB = cast<BasicBlock>(ValueMap[BB]);
-    
+  for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
     // Loop over all instructions, fixing each one as we find it...
-    for (BasicBlock::iterator II = NBB->begin(); II != NBB->end(); ++II)
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
       RemapInstruction(II, ValueMap);
-  }
 }
 
 /// CloneFunction - Return a copy of the specified function, but without


Index: llvm/lib/Transforms/Utils/CloneTrace.cpp
diff -u llvm/lib/Transforms/Utils/CloneTrace.cpp:1.5 llvm/lib/Transforms/Utils/CloneTrace.cpp:1.5.2.1
--- llvm/lib/Transforms/Utils/CloneTrace.cpp:1.5	Fri Jan  9 00:12:15 2004
+++ llvm/lib/Transforms/Utils/CloneTrace.cpp	Mon Mar  1 17:58:16 2004
@@ -7,11 +7,11 @@
 // 
 //===----------------------------------------------------------------------===//
 //
-// This file implements the CloneTrace interface, which is used 
-// when writing runtime optimizations. It takes a vector of basic blocks
-// clones the basic blocks, removes internal phi nodes, adds it to the
-// same function as the original (although there is no jump to it) and 
-// returns the new vector of basic blocks.
+// This file implements the CloneTrace interface, which is used when writing
+// runtime optimizations. It takes a vector of basic blocks clones the basic
+// blocks, removes internal phi nodes, adds it to the same function as the
+// original (although there is no jump to it) and returns the new vector of
+// basic blocks.
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,16 +34,14 @@
 	End = origTrace.end(); T != End; ++T) {
 
     //Clone Basic Block
-    BasicBlock *clonedBlock = CloneBasicBlock(*T, ValueMap);
+    BasicBlock *clonedBlock =
+      CloneBasicBlock(*T, ValueMap, ".tr", (*T)->getParent());
     
     //Add it to our new trace
     clonedTrace.push_back(clonedBlock);
 
     //Add this new mapping to our Value Map
     ValueMap[*T] = clonedBlock;
-
-    //Add this cloned BB to the old BB's function
-    (*T)->getParent()->getBasicBlockList().push_back(clonedBlock);
 
     //Loop over the phi instructions and delete operands
     //that are from blocks not in the trace


Index: llvm/lib/Transforms/Utils/InlineFunction.cpp
diff -u llvm/lib/Transforms/Utils/InlineFunction.cpp:1.18 llvm/lib/Transforms/Utils/InlineFunction.cpp:1.18.2.1
--- llvm/lib/Transforms/Utils/InlineFunction.cpp:1.18	Fri Jan  9 00:12:20 2004
+++ llvm/lib/Transforms/Utils/InlineFunction.cpp	Mon Mar  1 17:58:16 2004
@@ -11,7 +11,7 @@
 // parameters and the return value as appropriate.
 //
 // FIXME: This pass should transform alloca instructions in the called function
-//        into malloc/free pairs!  Or perhaps it should refuse to inline them!
+// into alloca/dealloca pairs!  Or perhaps it should refuse to inline them!
 //
 //===----------------------------------------------------------------------===//
 
@@ -50,183 +50,112 @@
   BasicBlock *OrigBB = TheCall->getParent();
   Function *Caller = OrigBB->getParent();
 
-  // We want to clone the entire callee function into the whole between the
-  // "starter" and "ender" blocks.  How we accomplish this depends on whether
-  // this is an invoke instruction or a call instruction.
-
-  BasicBlock *InvokeDest = 0;     // Exception handling destination
-  std::vector<Value*> InvokeDestPHIValues; // Values for PHI nodes in InvokeDest
-  BasicBlock *AfterCallBB;
-
-  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
-    InvokeDest = II->getExceptionalDest();
-
-    // If there are PHI nodes in the exceptional destination block, we need to
-    // keep track of which values came into them from this invoke, then remove
-    // the entry for this block.
-    for (BasicBlock::iterator I = InvokeDest->begin();
-         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      // Save the value to use for this edge...
-      InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(OrigBB));
-    }
-
-    // Add an unconditional branch to make this look like the CallInst case...
-    BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall);
-
-    // Split the basic block.  This guarantees that no PHI nodes will have to be
-    // updated due to new incoming edges, and make the invoke case more
-    // symmetric to the call case.
-    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
-                                          CalledFunc->getName()+".entry");
-
-    // Remove (unlink) the InvokeInst from the function...
-    OrigBB->getInstList().remove(TheCall);
-
-  } else {  // It's a call
-    // If this is a call instruction, we need to split the basic block that the
-    // call lives in.
-    //
-    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
-                                          CalledFunc->getName()+".entry");
-    // Remove (unlink) the CallInst from the function...
-    AfterCallBB->getInstList().remove(TheCall);
-  }
-
-  // If we have a return value generated by this call, convert it into a PHI 
-  // node that gets values from each of the old RET instructions in the original
-  // function.
-  //
-  PHINode *PHI = 0;
-  if (!TheCall->use_empty()) {
-    // The PHI node should go at the front of the new basic block to merge all 
-    // possible incoming values.
-    //
-    PHI = new PHINode(CalledFunc->getReturnType(), TheCall->getName(),
-                      AfterCallBB->begin());
-
-    // Anything that used the result of the function call should now use the PHI
-    // node as their operand.
-    //
-    TheCall->replaceAllUsesWith(PHI);
-  }
-
   // Get an iterator to the last basic block in the function, which will have
   // the new function inlined after it.
   //
   Function::iterator LastBlock = &Caller->back();
 
-  // Calculate the vector of arguments to pass into the function cloner...
-  std::map<const Value*, Value*> ValueMap;
-  assert(std::distance(CalledFunc->abegin(), CalledFunc->aend()) == 
-         std::distance(CS.arg_begin(), CS.arg_end()) &&
-         "No varargs calls can be inlined!");
-
-  CallSite::arg_iterator AI = CS.arg_begin();
-  for (Function::const_aiterator I = CalledFunc->abegin(), E=CalledFunc->aend();
-       I != E; ++I, ++AI)
-    ValueMap[I] = *AI;
-
-  // Since we are now done with the Call/Invoke, we can delete it.
-  delete TheCall;
-
-  // Make a vector to capture the return instructions in the cloned function...
+  // Make sure to capture all of the return instructions from the cloned
+  // function.
   std::vector<ReturnInst*> Returns;
+  { // Scope to destroy ValueMap after cloning.
+    // Calculate the vector of arguments to pass into the function cloner...
+    std::map<const Value*, Value*> ValueMap;
+    assert(std::distance(CalledFunc->abegin(), CalledFunc->aend()) == 
+           std::distance(CS.arg_begin(), CS.arg_end()) &&
+           "No varargs calls can be inlined!");
+    
+    CallSite::arg_iterator AI = CS.arg_begin();
+    for (Function::const_aiterator I = CalledFunc->abegin(),
+           E = CalledFunc->aend(); I != E; ++I, ++AI)
+      ValueMap[I] = *AI;
+    
+    // Clone the entire body of the callee into the caller.  
+    CloneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i");
+  }    
 
-  // Do all of the hard part of cloning the callee into the caller...
-  CloneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i");
-
-  // Loop over all of the return instructions, turning them into unconditional
-  // branches to the merge point now...
-  for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
-    ReturnInst *RI = Returns[i];
-    BasicBlock *BB = RI->getParent();
-
-    // Add a branch to the merge point where the PHI node lives if it exists.
-    new BranchInst(AfterCallBB, RI);
-
-    if (PHI) {   // The PHI node should include this value!
-      assert(RI->getReturnValue() && "Ret should have value!");
-      assert(RI->getReturnValue()->getType() == PHI->getType() && 
-             "Ret value not consistent in function!");
-      PHI->addIncoming(RI->getReturnValue(), BB);
-    }
-
-    // Delete the return instruction now
-    BB->getInstList().erase(RI);
-  }
-
-  // Check to see if the PHI node only has one argument.  This is a common
-  // case resulting from there only being a single return instruction in the
-  // function call.  Because this is so common, eliminate the PHI node.
-  //
-  if (PHI && PHI->getNumIncomingValues() == 1) {
-    PHI->replaceAllUsesWith(PHI->getIncomingValue(0));
-    PHI->getParent()->getInstList().erase(PHI);
-  }
-
-  // Change the branch that used to go to AfterCallBB to branch to the first
-  // basic block of the inlined function.
-  //
-  TerminatorInst *Br = OrigBB->getTerminator();
-  assert(Br && Br->getOpcode() == Instruction::Br && 
-	 "splitBasicBlock broken!");
-  Br->setOperand(0, ++LastBlock);
+  // Remember the first block that is newly cloned over.
+  Function::iterator FirstNewBlock = LastBlock; ++FirstNewBlock;
 
   // If there are any alloca instructions in the block that used to be the entry
   // block for the callee, move them to the entry block of the caller.  First
   // calculate which instruction they should be inserted before.  We insert the
   // instructions at the end of the current alloca list.
   //
-  if (isa<AllocaInst>(LastBlock->begin())) {
+  if (isa<AllocaInst>(FirstNewBlock->begin())) {
     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
-    while (isa<AllocaInst>(InsertPoint)) ++InsertPoint;
-    
-    for (BasicBlock::iterator I = LastBlock->begin(), E = LastBlock->end();
-         I != E; )
+    for (BasicBlock::iterator I = FirstNewBlock->begin(),
+           E = FirstNewBlock->end(); I != E; )
       if (AllocaInst *AI = dyn_cast<AllocaInst>(I++))
         if (isa<Constant>(AI->getArraySize())) {
-          LastBlock->getInstList().remove(AI);
-          Caller->front().getInstList().insert(InsertPoint, AI);      
+          // Scan for the block of allocas that we can move over.
+          while (isa<AllocaInst>(I) &&
+                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
+            ++I;
+
+          // Transfer all of the allocas over in a block.  Using splice means
+          // that they instructions aren't removed from the symbol table, then
+          // reinserted.
+          Caller->front().getInstList().splice(InsertPoint,
+                                               FirstNewBlock->getInstList(),
+                                               AI, I);
         }
   }
 
-  // If we just inlined a call due to an invoke instruction, scan the inlined
-  // function checking for function calls that should now be made into invoke
-  // instructions, and for unwind's which should be turned into branches.
-  if (InvokeDest) {
-    for (Function::iterator BB = LastBlock, E = Caller->end(); BB != E; ++BB) {
+  // If we are inlining for an invoke instruction, we must make sure to rewrite
+  // any inlined 'unwind' instructions into branches to the invoke exception
+  // destination, and call instructions into invoke instructions.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    BasicBlock *InvokeDest = II->getUnwindDest();
+    std::vector<Value*> InvokeDestPHIValues;
+
+    // If there are PHI nodes in the exceptional destination block, we need to
+    // keep track of which values came into them from this invoke, then remove
+    // the entry for this block.
+    for (BasicBlock::iterator I = InvokeDest->begin();
+         PHINode *PN = dyn_cast<PHINode>(I); ++I)
+      // Save the value to use for this edge...
+      InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(OrigBB));
+
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB) {
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
         // We only need to check for function calls: inlined invoke instructions
         // require no special handling...
         if (CallInst *CI = dyn_cast<CallInst>(I)) {
-          // Convert this function call into an invoke instruction...
-
-          // First, split the basic block...
-          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
-          
-          // Next, create the new invoke instruction, inserting it at the end
-          // of the old basic block.
-          InvokeInst *II =
-            new InvokeInst(CI->getCalledValue(), Split, InvokeDest, 
-                           std::vector<Value*>(CI->op_begin()+1, CI->op_end()),
-                           CI->getName(), BB->getTerminator());
-
-          // Make sure that anything using the call now uses the invoke!
-          CI->replaceAllUsesWith(II);
-
-          // Delete the unconditional branch inserted by splitBasicBlock
-          BB->getInstList().pop_back();
-          Split->getInstList().pop_front();  // Delete the original call
-          
-          // Update any PHI nodes in the exceptional block to indicate that
-          // there is now a new entry in them.
-          unsigned i = 0;
-          for (BasicBlock::iterator I = InvokeDest->begin();
-               PHINode *PN = dyn_cast<PHINode>(I); ++I, ++i)
-            PN->addIncoming(InvokeDestPHIValues[i], BB);
-
-          // This basic block is now complete, start scanning the next one.
-          break;
+          // Convert this function call into an invoke instruction... if it's
+          // not an intrinsic function call (which are known to not throw).
+          if (CI->getCalledFunction() &&
+              CI->getCalledFunction()->getIntrinsicID()) {
+            ++I;
+          } else {
+            // First, split the basic block...
+            BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+            
+            // Next, create the new invoke instruction, inserting it at the end
+            // of the old basic block.
+            InvokeInst *II =
+              new InvokeInst(CI->getCalledValue(), Split, InvokeDest, 
+                            std::vector<Value*>(CI->op_begin()+1, CI->op_end()),
+                             CI->getName(), BB->getTerminator());
+
+            // Make sure that anything using the call now uses the invoke!
+            CI->replaceAllUsesWith(II);
+            
+            // Delete the unconditional branch inserted by splitBasicBlock
+            BB->getInstList().pop_back();
+            Split->getInstList().pop_front();  // Delete the original call
+            
+            // Update any PHI nodes in the exceptional block to indicate that
+            // there is now a new entry in them.
+            unsigned i = 0;
+            for (BasicBlock::iterator I = InvokeDest->begin();
+                 PHINode *PN = dyn_cast<PHINode>(I); ++I, ++i)
+              PN->addIncoming(InvokeDestPHIValues[i], BB);
+            
+            // This basic block is now complete, start scanning the next one.
+            break;
+          }
         } else {
           ++I;
         }
@@ -255,27 +184,145 @@
     // the exception destination block still have entries due to the original
     // invoke instruction.  Eliminate these entries (which might even delete the
     // PHI node) now.
-    for (BasicBlock::iterator I = InvokeDest->begin();
-         PHINode *PN = dyn_cast<PHINode>(I); ++I)
-      PN->removeIncomingValue(AfterCallBB);
+    InvokeDest->removePredecessor(II->getParent());
+  }
+
+  // If we cloned in _exactly one_ basic block, and if that block ends in a
+  // return instruction, we splice the body of the inlined callee directly into
+  // the calling basic block.
+  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+    // Move all of the instructions right before the call.
+    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+                                 FirstNewBlock->begin(), FirstNewBlock->end());
+    // Remove the cloned basic block.
+    Caller->getBasicBlockList().pop_back();
+    
+    // If the call site was an invoke instruction, add a branch to the normal
+    // destination.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+      new BranchInst(II->getNormalDest(), TheCall);
+
+    // If the return instruction returned a value, replace uses of the call with
+    // uses of the returned value.
+    if (!TheCall->use_empty())
+      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+
+    // Since we are now done with the Call/Invoke, we can delete it.
+    TheCall->getParent()->getInstList().erase(TheCall);
+
+    // Since we are now done with the return instruction, delete it also.
+    Returns[0]->getParent()->getInstList().erase(Returns[0]);
+
+    // We are now done with the inlining.
+    return true;
+  }
+
+  // Otherwise, we have the normal case, of more than one block to inline or
+  // multiple return sites.
+
+  // We want to clone the entire callee function into the hole between the
+  // "starter" and "ender" blocks.  How we accomplish this depends on whether
+  // this is an invoke instruction or a call instruction.
+  BasicBlock *AfterCallBB;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    
+    // Add an unconditional branch to make this look like the CallInst case...
+    BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall);
+    
+    // Split the basic block.  This guarantees that no PHI nodes will have to be
+    // updated due to new incoming edges, and make the invoke case more
+    // symmetric to the call case.
+    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+                                          CalledFunc->getName()+".entry");
+    
+  } else {  // It's a call
+    // If this is a call instruction, we need to split the basic block that
+    // the call lives in.
+    //
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+                                          CalledFunc->getName()+".entry");
   }
+
+  // Change the branch that used to go to AfterCallBB to branch to the first
+  // basic block of the inlined function.
+  //
+  TerminatorInst *Br = OrigBB->getTerminator();
+  assert(Br && Br->getOpcode() == Instruction::Br && 
+         "splitBasicBlock broken!");
+  Br->setOperand(0, FirstNewBlock);
+
+
   // Now that the function is correct, make it a little bit nicer.  In
   // particular, move the basic blocks inserted from the end of the function
   // into the space made by splitting the source basic block.
   //
-  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), 
-                                     LastBlock, Caller->end());
+  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+                                     FirstNewBlock, Caller->end());
+
+  // Handle all of the return instructions that we just cloned in, and eliminate
+  // any users of the original call/invoke instruction.
+  if (Returns.size() > 1) {
+    // The PHI node should go at the front of the new basic block to merge all
+    // possible incoming values.
+    //
+    PHINode *PHI = 0;
+    if (!TheCall->use_empty()) {
+      PHI = new PHINode(CalledFunc->getReturnType(),
+                        TheCall->getName(), AfterCallBB->begin());
+        
+      // Anything that used the result of the function call should now use the
+      // PHI node as their operand.
+      //
+      TheCall->replaceAllUsesWith(PHI);
+    }
+      
+    // Loop over all of the return instructions, turning them into unconditional
+    // branches to the merge point now, and adding entries to the PHI node as
+    // appropriate.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *RI = Returns[i];
+        
+      if (PHI) {
+        assert(RI->getReturnValue() && "Ret should have value!");
+        assert(RI->getReturnValue()->getType() == PHI->getType() && 
+               "Ret value not consistent in function!");
+        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+      }
+        
+      // Add a branch to the merge point where the PHI node lives if it exists.
+      new BranchInst(AfterCallBB, RI);
+        
+      // Delete the return instruction now
+      RI->getParent()->getInstList().erase(RI);
+    }
+      
+  } else if (!Returns.empty()) {
+    // Otherwise, if there is exactly one return value, just replace anything
+    // using the return value of the call with the computed value.
+    if (!TheCall->use_empty())
+      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+      
+    // Add a branch to the merge point where the PHI node lives if it exists.
+    new BranchInst(AfterCallBB, Returns[0]);
+      
+    // Delete the return instruction now
+    Returns[0]->getParent()->getInstList().erase(Returns[0]);
+  }
+    
+  // Since we are now done with the Call/Invoke, we can delete it.
+  TheCall->getParent()->getInstList().erase(TheCall);
 
   // We should always be able to fold the entry block of the function into the
   // single predecessor of the block...
-  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+  assert(cast<BranchInst>(Br)->isUnconditional() &&"splitBasicBlock broken!");
   BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
   SimplifyCFG(CalleeEntry);
-  
+    
   // Okay, continue the CFG cleanup.  It's often the case that there is only a
   // single return instruction in the callee function.  If this is the case,
   // then we have an unconditional branch from the return block to the
   // 'AfterCallBB'.  Check for this case, and eliminate the branch is possible.
   SimplifyCFG(AfterCallBB);
+
   return true;
 }


Index: llvm/lib/Transforms/Utils/Linker.cpp
diff -u llvm/lib/Transforms/Utils/Linker.cpp:1.66 llvm/lib/Transforms/Utils/Linker.cpp:1.66.2.1
--- llvm/lib/Transforms/Utils/Linker.cpp:1.66	Mon Jan 12 13:10:58 2004
+++ llvm/lib/Transforms/Utils/Linker.cpp	Mon Mar  1 17:58:16 2004
@@ -284,7 +284,8 @@
 
   // Check to see if it's a constant that we are interesting in transforming...
   if (const Constant *CPV = dyn_cast<Constant>(In)) {
-    if (!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV))
+    if ((!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV)) ||
+        isa<ConstantAggregateZero>(CPV))
       return const_cast<Constant*>(CPV);   // Simple constants stay identical...
 
     Constant *Result = 0;
@@ -565,7 +566,6 @@
 
       GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[SGV]);    
       if (DGV->hasInitializer()) {
-        assert(SGV->getLinkage() == DGV->getLinkage());
         if (SGV->hasExternalLinkage()) {
           if (DGV->getInitializer() != SInit)
             return Error(Err, "Global Variable Collision on '" + 
@@ -574,6 +574,9 @@
         } else if (DGV->hasLinkOnceLinkage() || DGV->hasWeakLinkage()) {
           // Nothing is required, mapped values will take the new global
           // automatically.
+        } else if (SGV->hasLinkOnceLinkage() || SGV->hasWeakLinkage()) {
+          // Nothing is required, mapped values will take the new global
+          // automatically.
         } else if (DGV->hasAppendingLinkage()) {
           assert(0 && "Appending linkage unimplemented!");
         } else {
@@ -796,12 +799,24 @@
 
       // Merge the initializer...
       Inits.reserve(NewSize);
-      ConstantArray *I = cast<ConstantArray>(G1->getInitializer());
-      for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
-        Inits.push_back(cast<Constant>(I->getValues()[i]));
-      I = cast<ConstantArray>(G2->getInitializer());
-      for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
-        Inits.push_back(cast<Constant>(I->getValues()[i]));
+      if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) {
+        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+          Inits.push_back(cast<Constant>(I->getValues()[i]));
+      } else {
+        assert(isa<ConstantAggregateZero>(G1->getInitializer()));
+        Constant *CV = Constant::getNullValue(T1->getElementType());
+        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+          Inits.push_back(CV);
+      }
+      if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) {
+        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+          Inits.push_back(cast<Constant>(I->getValues()[i]));
+      } else {
+        assert(isa<ConstantAggregateZero>(G2->getInitializer()));
+        Constant *CV = Constant::getNullValue(T2->getElementType());
+        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+          Inits.push_back(CV);
+      }
       NG->setInitializer(ConstantArray::get(NewType, Inits));
       Inits.clear();
 


Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
diff -u llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp:1.59 llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp:1.59.2.1
--- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp:1.59	Sun Jan 11 19:18:32 2004
+++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp	Mon Mar  1 17:58:16 2004
@@ -110,7 +110,9 @@
   private:
     void MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum,
                                std::set<PHINode*> &DeadPHINodes);
-    void PromoteLocallyUsedAlloca(AllocaInst *AI);
+    void PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI);
+    void PromoteLocallyUsedAllocas(BasicBlock *BB, 
+                                   const std::vector<AllocaInst*> &AIs);
 
     void RenamePass(BasicBlock *BB, BasicBlock *Pred,
                     std::vector<Value*> &IncVals);
@@ -122,6 +124,13 @@
 void PromoteMem2Reg::run() {
   Function &F = *DF.getRoot()->getParent();
 
+  // LocallyUsedAllocas - Keep track of all of the alloca instructions which are
+  // only used in a single basic block.  These instructions can be efficiently
+  // promoted by performing a single linear scan over that one block.  Since
+  // individual basic blocks are sometimes large, we group together all allocas
+  // that are live in a single basic block by the basic block they are live in.
+  std::map<BasicBlock*, std::vector<AllocaInst*> > LocallyUsedAllocas;
+
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
 
@@ -207,9 +216,9 @@
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
     if (OnlyUsedInOneBlock) {
-      PromoteLocallyUsedAlloca(AI);
+      LocallyUsedAllocas[OnlyBlock].push_back(AI);
 
-      // Remove the alloca from the Allocas list, since it has been processed
+      // Remove the alloca from the Allocas list, since it will be processed.
       Allocas[AllocaNum] = Allocas.back();
       Allocas.pop_back();
       --AllocaNum;
@@ -272,6 +281,20 @@
     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
   }
   
+  // Process all allocas which are only used in a single basic block.
+  for (std::map<BasicBlock*, std::vector<AllocaInst*> >::iterator I =
+         LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){
+    const std::vector<AllocaInst*> &Allocas = I->second;
+    assert(!Allocas.empty() && "empty alloca list??");
+
+    // It's common for there to only be one alloca in the list.  Handle it
+    // efficiently.
+    if (Allocas.size() == 1)
+      PromoteLocallyUsedAlloca(I->first, Allocas[0]);
+    else
+      PromoteLocallyUsedAllocas(I->first, Allocas);
+  }
+
   if (Allocas.empty())
     return; // All of the allocas must have been trivial!
 
@@ -393,41 +416,92 @@
   }
 }
 
-// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic
-// block.  If this is the case, avoid traversing the CFG and inserting a lot of
-// potentially useless PHI nodes by just performing a single linear pass over
-// the basic block using the Alloca.
-//
-void PromoteMem2Reg::PromoteLocallyUsedAlloca(AllocaInst *AI) {
+/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic
+/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+void PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) {
   assert(!AI->use_empty() && "There are no uses of the alloca!");
 
-  // Uses of the uninitialized memory location shall get zero...
-  Value *CurVal = Constant::getNullValue(AI->getAllocatedType());
+  // Handle degenerate cases quickly.
+  if (AI->hasOneUse()) {
+    Instruction *U = cast<Instruction>(AI->use_back());
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Must be a load of uninitialized value.
+      LI->replaceAllUsesWith(Constant::getNullValue(AI->getAllocatedType()));
+    } else {
+      // Otherwise it must be a store which is never read.
+      assert(isa<StoreInst>(U));
+    }
+    BB->getInstList().erase(U);
+  } else {
+    // Uses of the uninitialized memory location shall get zero...
+    Value *CurVal = Constant::getNullValue(AI->getAllocatedType());
   
-  BasicBlock *BB = cast<Instruction>(AI->use_back())->getParent();
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      Instruction *Inst = I++;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        if (LI->getOperand(0) == AI) {
+          // Loads just returns the "current value"...
+          LI->replaceAllUsesWith(CurVal);
+          BB->getInstList().erase(LI);
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        if (SI->getOperand(1) == AI) {
+          // Store updates the "current value"...
+          CurVal = SI->getOperand(0);
+          BB->getInstList().erase(SI);
+        }
+      }
+    }
+  }
+
+  // After traversing the basic block, there should be no more uses of the
+  // alloca, remove it now.
+  assert(AI->use_empty() && "Uses of alloca from more than one BB??");
+  AI->getParent()->getInstList().erase(AI);
+}
+
+/// PromoteLocallyUsedAllocas - This method is just like
+/// PromoteLocallyUsedAlloca, except that it processes multiple alloca
+/// instructions in parallel.  This is important in cases where we have large
+/// basic blocks, as we don't want to rescan the entire basic block for each
+/// alloca which is locally used in it (which might be a lot).
+void PromoteMem2Reg::
+PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs) {
+  std::map<AllocaInst*, Value*> CurValues;
+  for (unsigned i = 0, e = AIs.size(); i != e; ++i)
+    CurValues[AIs[i]] = 0; // Insert with null value
 
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
     Instruction *Inst = I++;
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-      if (LI->getOperand(0) == AI) {
-        // Loads just return the "current value"...
-        LI->replaceAllUsesWith(CurVal);
-        BB->getInstList().erase(LI);
+      // Is this a load of an alloca we are tracking?
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(LI->getOperand(0))) {
+        std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI);
+        if (AIt != CurValues.end()) {
+          // Loads just returns the "current value"...
+          if (AIt->second == 0)   // Uninitialized value??
+            AIt->second =Constant::getNullValue(AIt->first->getAllocatedType());
+          LI->replaceAllUsesWith(AIt->second);
+          BB->getInstList().erase(LI);
+        }
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      if (SI->getOperand(1) == AI) {
-        // Loads just update the "current value"...
-        CurVal = SI->getOperand(0);
-        BB->getInstList().erase(SI);
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1))) {
+        std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI);
+        if (AIt != CurValues.end()) {
+          // Store updates the "current value"...
+          AIt->second = SI->getOperand(0);
+          BB->getInstList().erase(SI);
+        }
       }
     }
   }
-
-  // After traversing the basic block, there should be no more uses of the
-  // alloca, remove it now.
-  assert(AI->use_empty() && "Uses of alloca from more than one BB??");
-  AI->getParent()->getInstList().erase(AI);
 }
+
+
 
 // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
 // Alloca returns true if there wasn't already a phi-node for that variable


Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp
diff -u llvm/lib/Transforms/Utils/SimplifyCFG.cpp:1.19 llvm/lib/Transforms/Utils/SimplifyCFG.cpp:1.19.2.1
--- llvm/lib/Transforms/Utils/SimplifyCFG.cpp:1.19	Fri Jan  9 00:12:25 2004
+++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp	Mon Mar  1 17:58:16 2004
@@ -12,22 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constant.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/iPHINode.h"
-#include "llvm/iTerminators.h"
-#include "llvm/iOther.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
 #include <functional>
+#include <set>
 using namespace llvm;
 
-// PropagatePredecessors - This gets "Succ" ready to have the predecessors from
-// "BB".  This is a little tricky because "Succ" has PHI nodes, which need to
-// have extra slots added to them to hold the merge edges from BB's
-// predecessors, and BB itself might have had PHI nodes in it.  This function
-// returns true (failure) if the Succ BB already has a predecessor that is a
-// predecessor of BB and incoming PHI arguments would not be discernible.
+// PropagatePredecessorsForPHIs - This gets "Succ" ready to have the
+// predecessors from "BB".  This is a little tricky because "Succ" has PHI
+// nodes, which need to have extra slots added to them to hold the merge edges
+// from BB's predecessors, and BB itself might have had PHI nodes in it.  This
+// function returns true (failure) if the Succ BB already has a predecessor that
+// is a predecessor of BB and incoming PHI arguments would not be discernible.
 //
 // Assumption: Succ is the single successor for BB.
 //
@@ -89,6 +88,408 @@
   return false;
 }
 
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and
+/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+/// 
+///
+static Value *GetIfCondition(BasicBlock *BB,
+                             BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
+  assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+         "Function can only handle blocks with 2 predecessors!");
+  BasicBlock *Pred1 = *pred_begin(BB);
+  BasicBlock *Pred2 = *++pred_begin(BB);
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  if (!isa<BranchInst>(Pred1->getTerminator()) ||
+      !isa<BranchInst>(Pred2->getTerminator()))
+    return 0;
+  BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (++pred_begin(Pred2) != pred_end(Pred2))
+      return 0;
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  if (pred_begin(Pred1) == pred_end(Pred1) ||
+      ++pred_begin(Pred1) != pred_end(Pred1) ||
+      pred_begin(Pred2) == pred_end(Pred2) ||
+      ++pred_begin(Pred2) != pred_end(Pred2) ||
+      *pred_begin(Pred1) != *pred_begin(Pred2))
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BasicBlock *CommonPred = *pred_begin(Pred1);
+  if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
+    assert(BI->isConditional() && "Two successors but not conditional?");
+    if (BI->getSuccessor(0) == Pred1) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    }
+    return BI->getCondition();
+  }
+  return 0;
+}
+
+
+// If we have a merge point of an "if condition" as accepted above, return true
+// if the specified value dominates the block.  We don't handle the true
+// generality of domination here, just a special case which works well enough
+// for us.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB) {
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *PBB = I->getParent();
+    // If this instruction is defined in a block that contains an unconditional
+    // branch to BB, then it must be in the 'conditional' part of the "if
+    // statement".
+    if (isa<BranchInst>(PBB->getTerminator()) && 
+        cast<BranchInst>(PBB->getTerminator())->isUnconditional() && 
+        cast<BranchInst>(PBB->getTerminator())->getSuccessor(0) == BB)
+      return false;
+
+    // We also don't want to allow wierd loops that might have the "if
+    // condition" in the bottom of this block.
+    if (PBB == BB) return false;
+  }
+
+  // Non-instructions all dominate instructions.
+  return true;
+}
+
+// GatherConstantSetEQs - Given a potentially 'or'd together collection of seteq
+// instructions that compare a value against a constant, return the value being
+// compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetEQs(Value *V, std::vector<Constant*> &Values) {
+  if (Instruction *Inst = dyn_cast<Instruction>(V))
+    if (Inst->getOpcode() == Instruction::SetEQ) {
+      if (Constant *C = dyn_cast<Constant>(Inst->getOperand(1))) {
+        Values.push_back(C);
+        return Inst->getOperand(0);
+      } else if (Constant *C = dyn_cast<Constant>(Inst->getOperand(0))) {
+        Values.push_back(C);
+        return Inst->getOperand(1);
+      }
+    } else if (Inst->getOpcode() == Instruction::Or) {
+      if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
+        if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
+          if (LHS == RHS)
+            return LHS;
+    }
+  return 0;
+}
+
+// GatherConstantSetNEs - Given a potentially 'and'd together collection of
+// setne instructions that compare a value against a constant, return the value
+// being compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetNEs(Value *V, std::vector<Constant*> &Values) {
+  if (Instruction *Inst = dyn_cast<Instruction>(V))
+    if (Inst->getOpcode() == Instruction::SetNE) {
+      if (Constant *C = dyn_cast<Constant>(Inst->getOperand(1))) {
+        Values.push_back(C);
+        return Inst->getOperand(0);
+      } else if (Constant *C = dyn_cast<Constant>(Inst->getOperand(0))) {
+        Values.push_back(C);
+        return Inst->getOperand(1);
+      }
+    } else if (Inst->getOpcode() == Instruction::Cast) {
+      // Cast of X to bool is really a comparison against zero.
+      assert(Inst->getType() == Type::BoolTy && "Can only handle bool values!");
+      Values.push_back(Constant::getNullValue(Inst->getOperand(0)->getType()));
+      return Inst->getOperand(0);
+    } else if (Inst->getOpcode() == Instruction::And) {
+      if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
+        if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
+          if (LHS == RHS)
+            return LHS;
+    }
+  return 0;
+}
+
+
+
+/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
+/// bunch of comparisons of one value against constants, return the value and
+/// the constants being compared.
+static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+                                   std::vector<Constant*> &Values) {
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantSetEQs(Cond, Values);
+
+    // Return true to indicate that the condition is true if the CompVal is
+    // equal to one of the constants.
+    return true;
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantSetNEs(Cond, Values);
+        
+    // Return false to indicate that the condition is false if the CompVal is
+    // equal to one of the constants.
+    return false;
+  }
+  return false;
+}
+
+/// ErasePossiblyDeadInstructionTree - If the specified instruction is dead and
+/// has no side effects, nuke it.  If it uses any instructions that become dead
+/// because the instruction is now gone, nuke them too.
+static void ErasePossiblyDeadInstructionTree(Instruction *I) {
+  if (isInstructionTriviallyDead(I)) {
+    std::vector<Value*> Operands(I->op_begin(), I->op_end());
+    I->getParent()->getInstList().erase(I);
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (Instruction *OpI = dyn_cast<Instruction>(Operands[i]))
+        ErasePossiblyDeadInstructionTree(OpI);
+  }
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+  if (SI1 == SI2) return false;  // Can't merge with self!
+
+  // It is not safe to merge these two switch instructions if they have a common
+  // successor, and if that successor has a PHI node, and if that PHI node has
+  // conflicting incoming values from the two switch blocks.
+  BasicBlock *SI1BB = SI1->getParent();
+  BasicBlock *SI2BB = SI2->getParent();
+  std::set<BasicBlock*> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+
+  for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+    if (SI1Succs.count(*I))
+      for (BasicBlock::iterator BBI = (*I)->begin();
+           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI)
+        if (PN->getIncomingValueForBlock(SI1BB) !=
+            PN->getIncomingValueForBlock(SI2BB))
+          return false;
+        
+  return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block.  The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, and existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+                                  BasicBlock *ExistPred) {
+  assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
+         succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
+  if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+
+  for (BasicBlock::iterator I = Succ->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    Value *V = PN->getIncomingValueForBlock(ExistPred);
+    PN->addIncoming(V, NewPred);
+  }
+}
+
+// isValueEqualityComparison - Return true if the specified terminator checks to
+// see if a value is equal to constant integer value.
+static Value *isValueEqualityComparison(TerminatorInst *TI) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
+    return SI->getCondition();
+  if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (BI->isConditional() && BI->getCondition()->hasOneUse())
+      if (SetCondInst *SCI = dyn_cast<SetCondInst>(BI->getCondition()))
+        if ((SCI->getOpcode() == Instruction::SetEQ ||
+             SCI->getOpcode() == Instruction::SetNE) && 
+            isa<ConstantInt>(SCI->getOperand(1)))
+          return SCI->getOperand(0);
+  return 0;
+}
+
+// Given a value comparison instruction, decode all of the 'cases' that it
+// represents and return the 'default' block.
+static BasicBlock *
+GetValueEqualityComparisonCases(TerminatorInst *TI, 
+                                std::vector<std::pair<ConstantInt*,
+                                                      BasicBlock*> > &Cases) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cases.reserve(SI->getNumCases());
+    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+      Cases.push_back(std::make_pair(cast<ConstantInt>(SI->getCaseValue(i)),
+                                     SI->getSuccessor(i)));
+    return SI->getDefaultDest();
+  }
+
+  BranchInst *BI = cast<BranchInst>(TI);
+  SetCondInst *SCI = cast<SetCondInst>(BI->getCondition());
+  Cases.push_back(std::make_pair(cast<ConstantInt>(SCI->getOperand(1)),
+                                 BI->getSuccessor(SCI->getOpcode() ==
+                                                        Instruction::SetNE)));
+  return BI->getSuccessor(SCI->getOpcode() == Instruction::SetEQ);
+}
+
+
+// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+// equality comparison instruction (either a switch or a branch on "X == c").
+// See if any of the predecessors of the terminator block are value comparisons
+// on the same value.  If so, and if safe to do so, fold them together.
+static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+  BasicBlock *BB = TI->getParent();
+  Value *CV = isValueEqualityComparison(TI);  // CondVal
+  assert(CV && "Not a comparison?");
+  bool Changed = false;
+
+  std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    Preds.pop_back();
+    
+    // See if the predecessor is a comparison with the same value.
+    TerminatorInst *PTI = Pred->getTerminator();
+    Value *PCV = isValueEqualityComparison(PTI);  // PredCondVal
+
+    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+      // Figure out which 'cases' to copy from SI to PSI.
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+      // Based on whether the default edge from PTI goes to BB or not, fill in
+      // PredCases and PredDefault with the new switch cases we would like to
+      // build.
+      std::vector<BasicBlock*> NewSuccessors;
+
+      if (PredDefault == BB) {
+        // If this is the default destination from PTI, only the edges in TI
+        // that don't occur in PTI, or that branch to BB will be activated.
+        std::set<ConstantInt*> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second != BB)
+            PTIHandled.insert(PredCases[i].first);
+          else {
+            // The default destination is BB, we don't need explicit targets.
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Reconstruct the new switch statement we will be building.
+        if (PredDefault != BBDefault) {
+          PredDefault->removePredecessor(Pred);
+          PredDefault = BBDefault;
+          NewSuccessors.push_back(BBDefault);
+        }
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (!PTIHandled.count(BBCases[i].first) &&
+              BBCases[i].second != BBDefault) {
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+          }
+
+      } else {
+        // If this is not the default destination from PSI, only the edges
+        // in SI that occur in PSI with a destination of BB will be
+        // activated.
+        std::set<ConstantInt*> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second == BB) {
+            PTIHandled.insert(PredCases[i].first);
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Okay, now we know which constants were sent to BB from the
+        // predecessor.  Figure out where they will all go now.
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (PTIHandled.count(BBCases[i].first)) {
+            // If this is one we are capable of getting...
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+            PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+          }
+
+        // If there are any constants vectored to BB that TI doesn't handle,
+        // they must go to the default destination of TI.
+        for (std::set<ConstantInt*>::iterator I = PTIHandled.begin(),
+               E = PTIHandled.end(); I != E; ++I) {
+          PredCases.push_back(std::make_pair(*I, BBDefault));
+          NewSuccessors.push_back(BBDefault);
+        }
+      }
+
+      // Okay, at this point, we know which new successor Pred will get.  Make
+      // sure we update the number of entries in the PHI nodes for these
+      // successors.
+      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+      // Now that the successors are updated, create the new Switch instruction.
+      SwitchInst *NewSI = new SwitchInst(CV, PredDefault, PTI);
+      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+        NewSI->addCase(PredCases[i].first, PredCases[i].second);
+      Pred->getInstList().erase(PTI);
+
+      // Okay, last check.  If BB is still a successor of PSI, then we must
+      // have an infinite loop case.  If so, add an infinitely looping block
+      // to handle the case to preserve the behavior of the code.
+      BasicBlock *InfLoopBlock = 0;
+      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+        if (NewSI->getSuccessor(i) == BB) {
+          if (InfLoopBlock == 0) {
+            // Insert it at the end of the loop, because it's either code,
+            // or it won't matter if it's hot. :)
+            InfLoopBlock = new BasicBlock("infloop", BB->getParent());
+            new BranchInst(InfLoopBlock, InfLoopBlock);
+          }
+          NewSI->setSuccessor(i, InfLoopBlock);
+        }
+          
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
 
 // SimplifyCFG - This function is used to do simplification of a CFG.  For
 // example, it adjusts branches to branches to eliminate the extra hop, it
@@ -105,39 +506,9 @@
   assert(BB->getTerminator() && "Degenerate basic block encountered!");
   assert(&BB->getParent()->front() != BB && "Can't Simplify entry block!");
 
-  // Check to see if the first instruction in this block is just an
-  // 'llvm.unwind'.  If so, replace any invoke instructions which use this as an
-  // exception destination with call instructions.
-  //
-  if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator()))
-    if (BB->begin() == BasicBlock::iterator(UI)) {  // Empty block?
-      std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
-      while (!Preds.empty()) {
-        BasicBlock *Pred = Preds.back();
-        if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
-          if (II->getExceptionalDest() == BB) {
-            // Insert a new branch instruction before the invoke, because this
-            // is now a fall through...
-            BranchInst *BI = new BranchInst(II->getNormalDest(), II);
-            Pred->getInstList().remove(II);   // Take out of symbol table
-            
-            // Insert the call now...
-            std::vector<Value*> Args(II->op_begin()+3, II->op_end());
-            CallInst *CI = new CallInst(II->getCalledValue(), Args,
-                                        II->getName(), BI);
-            // If the invoke produced a value, the Call now does instead
-            II->replaceAllUsesWith(CI);
-            delete II;
-            Changed = true;
-          }
-        
-        Preds.pop_back();
-      }
-    }
-
   // Remove basic blocks that have no predecessors... which are unreachable.
-  if (pred_begin(BB) == pred_end(BB) &&
-      !BB->hasConstantReferences()) {
+  if (pred_begin(BB) == pred_end(BB) ||
+      *pred_begin(BB) == BB && ++pred_begin(BB) == pred_end(BB)) {
     //cerr << "Removing BB: \n" << BB;
 
     // Loop through all of our successors and make sure they know that one
@@ -233,69 +604,328 @@
     }
   }
 
+  // If this is a returning block with only PHI nodes in it, fold the return
+  // instruction into any unconditional branch predecessors.
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    BasicBlock::iterator BBI = BB->getTerminator();
+    if (BBI == BB->begin() || isa<PHINode>(--BBI)) {
+      // Find predecessors that end with unconditional branches.
+      std::vector<BasicBlock*> UncondBranchPreds;
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        TerminatorInst *PTI = (*PI)->getTerminator();
+        if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+          if (BI->isUnconditional())
+            UncondBranchPreds.push_back(*PI);
+      }
+      
+      // If we found some, do the transformation!
+      if (!UncondBranchPreds.empty()) {
+        while (!UncondBranchPreds.empty()) {
+          BasicBlock *Pred = UncondBranchPreds.back();
+          UncondBranchPreds.pop_back();
+          Instruction *UncondBranch = Pred->getTerminator();
+          // Clone the return and add it to the end of the predecessor.
+          Instruction *NewRet = RI->clone();
+          Pred->getInstList().push_back(NewRet);
+
+          // If the return instruction returns a value, and if the value was a
+          // PHI node in "BB", propagate the right value into the return.
+          if (NewRet->getNumOperands() == 1)
+            if (PHINode *PN = dyn_cast<PHINode>(NewRet->getOperand(0)))
+              if (PN->getParent() == BB)
+                NewRet->setOperand(0, PN->getIncomingValueForBlock(Pred));
+          // Update any PHI nodes in the returning block to realize that we no
+          // longer branch to them.
+          BB->removePredecessor(Pred);
+          Pred->getInstList().erase(UncondBranch);
+        }
+
+        // If we eliminated all predecessors of the block, delete the block now.
+        if (pred_begin(BB) == pred_end(BB))
+          // We know there are no successors, so just nuke the block.
+          M->getBasicBlockList().erase(BB);
+
+        return true;
+      }
+    }
+  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->begin())) {
+    // Check to see if the first instruction in this block is just an unwind.
+    // If so, replace any invoke instructions which use this as an exception
+    // destination with call instructions.
+    //
+    std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
+    while (!Preds.empty()) {
+      BasicBlock *Pred = Preds.back();
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+        if (II->getUnwindDest() == BB) {
+          // Insert a new branch instruction before the invoke, because this
+          // is now a fall through...
+          BranchInst *BI = new BranchInst(II->getNormalDest(), II);
+          Pred->getInstList().remove(II);   // Take out of symbol table
+          
+          // Insert the call now...
+          std::vector<Value*> Args(II->op_begin()+3, II->op_end());
+          CallInst *CI = new CallInst(II->getCalledValue(), Args,
+                                      II->getName(), BI);
+          // If the invoke produced a value, the Call now does instead
+          II->replaceAllUsesWith(CI);
+          delete II;
+          Changed = true;
+        }
+      
+      Preds.pop_back();
+    }
+
+    // If this block is now dead, remove it.
+    if (pred_begin(BB) == pred_end(BB)) {
+      // We know there are no successors, so just nuke the block.
+      M->getBasicBlockList().erase(BB);
+      return true;
+    }
+
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->begin())) {
+    if (FoldValueComparisonIntoPredecessors(SI))
+      return SimplifyCFG(BB) || 1;
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (Value *CompVal = isValueEqualityComparison(BB->getTerminator())) {
+      // This block must be empty, except for the setcond inst, if it exists.
+      BasicBlock::iterator I = BB->begin();
+      if (&*I == BI ||
+          (&*I == cast<Instruction>(BI->getCondition()) &&
+           &*++I == BI))
+        if (FoldValueComparisonIntoPredecessors(BI))
+          return SimplifyCFG(BB) || 1;
+    }
+  }
+
   // Merge basic blocks into their predecessor if there is only one distinct
   // pred, and if there is only one distinct successor of the predecessor, and
   // if there are no PHI nodes.
   //
-  if (!BB->hasConstantReferences()) {
-    pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
-    BasicBlock *OnlyPred = *PI++;
-    for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
-      if (*PI != OnlyPred) {
-        OnlyPred = 0;       // There are multiple different predecessors...
+  pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
+  BasicBlock *OnlyPred = *PI++;
+  for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
+    if (*PI != OnlyPred) {
+      OnlyPred = 0;       // There are multiple different predecessors...
+      break;
+    }
+  
+  BasicBlock *OnlySucc = 0;
+  if (OnlyPred && OnlyPred != BB &&    // Don't break self loops
+      OnlyPred->getTerminator()->getOpcode() != Instruction::Invoke) {
+    // Check to see if there is only one distinct successor...
+    succ_iterator SI(succ_begin(OnlyPred)), SE(succ_end(OnlyPred));
+    OnlySucc = BB;
+    for (; SI != SE; ++SI)
+      if (*SI != OnlySucc) {
+        OnlySucc = 0;     // There are multiple distinct successors!
         break;
       }
-  
-    BasicBlock *OnlySucc = 0;
-    if (OnlyPred && OnlyPred != BB &&    // Don't break self loops
-        OnlyPred->getTerminator()->getOpcode() != Instruction::Invoke) {
-      // Check to see if there is only one distinct successor...
-      succ_iterator SI(succ_begin(OnlyPred)), SE(succ_end(OnlyPred));
-      OnlySucc = BB;
-      for (; SI != SE; ++SI)
-        if (*SI != OnlySucc) {
-          OnlySucc = 0;     // There are multiple distinct successors!
-          break;
-        }
-    }
+  }
 
-    if (OnlySucc) {
-      //cerr << "Merging: " << BB << "into: " << OnlyPred;
-      TerminatorInst *Term = OnlyPred->getTerminator();
-
-      // Resolve any PHI nodes at the start of the block.  They are all
-      // guaranteed to have exactly one entry if they exist, unless there are
-      // multiple duplicate (but guaranteed to be equal) entries for the
-      // incoming edges.  This occurs when there are multiple edges from
-      // OnlyPred to OnlySucc.
-      //
-      while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-        PN->replaceAllUsesWith(PN->getIncomingValue(0));
-        BB->getInstList().pop_front();  // Delete the phi node...
-      }
+  if (OnlySucc) {
+    //cerr << "Merging: " << BB << "into: " << OnlyPred;
+    TerminatorInst *Term = OnlyPred->getTerminator();
+
+    // Resolve any PHI nodes at the start of the block.  They are all
+    // guaranteed to have exactly one entry if they exist, unless there are
+    // multiple duplicate (but guaranteed to be equal) entries for the
+    // incoming edges.  This occurs when there are multiple edges from
+    // OnlyPred to OnlySucc.
+    //
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      BB->getInstList().pop_front();  // Delete the phi node...
+    }
 
-      // Delete the unconditional branch from the predecessor...
-      OnlyPred->getInstList().pop_back();
+    // Delete the unconditional branch from the predecessor...
+    OnlyPred->getInstList().pop_back();
       
-      // Move all definitions in the successor to the predecessor...
-      OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+    // Move all definitions in the successor to the predecessor...
+    OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
                                      
-      // Make all PHI nodes that referred to BB now refer to Pred as their
-      // source...
-      BB->replaceAllUsesWith(OnlyPred);
+    // Make all PHI nodes that referred to BB now refer to Pred as their
+    // source...
+    BB->replaceAllUsesWith(OnlyPred);
 
-      std::string OldName = BB->getName();
+    std::string OldName = BB->getName();
 
-      // Erase basic block from the function... 
-      M->getBasicBlockList().erase(BB);
+    // Erase basic block from the function... 
+    M->getBasicBlockList().erase(BB);
 
-      // Inherit predecessors name if it exists...
-      if (!OldName.empty() && !OnlyPred->hasName())
-        OnlyPred->setName(OldName);
+    // Inherit predecessors name if it exists...
+    if (!OldName.empty() && !OnlyPred->hasName())
+      OnlyPred->setName(OldName);
       
-      return true;
-    }
+    return true;
   }
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      // Change br (X == 0 | X == 1), T, F into a switch instruction.
+      if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
+        Instruction *Cond = cast<Instruction>(BI->getCondition());
+        // If this is a bunch of seteq's or'd together, or if it's a bunch of
+        // 'setne's and'ed together, collect them.
+        Value *CompVal = 0;
+        std::vector<Constant*> Values;
+        bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
+        if (CompVal && CompVal->getType()->isInteger()) {
+          // There might be duplicate constants in the list, which the switch
+          // instruction can't handle, remove them now.
+          std::sort(Values.begin(), Values.end());
+          Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+          
+          // Figure out which block is which destination.
+          BasicBlock *DefaultBB = BI->getSuccessor(1);
+          BasicBlock *EdgeBB    = BI->getSuccessor(0);
+          if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+          
+          // Create the new switch instruction now.
+          SwitchInst *New = new SwitchInst(CompVal, DefaultBB, BI);
+          
+          // Add all of the 'cases' to the switch instruction.
+          for (unsigned i = 0, e = Values.size(); i != e; ++i)
+            New->addCase(Values[i], EdgeBB);
+          
+          // We added edges from PI to the EdgeBB.  As such, if there were any
+          // PHI nodes in EdgeBB, they need entries to be added corresponding to
+          // the number of edges added.
+          for (BasicBlock::iterator BBI = EdgeBB->begin();
+               PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+            Value *InVal = PN->getIncomingValueForBlock(*PI);
+            for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+              PN->addIncoming(InVal, *PI);
+          }
+
+          // Erase the old branch instruction.
+          (*PI)->getInstList().erase(BI);
+
+          // Erase the potentially condition tree that was used to computed the
+          // branch condition.
+          ErasePossiblyDeadInstructionTree(Cond);
+          return true;
+        }
+      }
+
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2) {
+      // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
+      // statement", which has a very simple dominance structure.  Basically, we
+      // are trying to find the condition that is being branched on, which
+      // subsequently causes this merge to happen.  We really want control
+      // dependence information for this check, but simplifycfg can't keep it up
+      // to date, and this catches most of the cases we care about anyway.
+      //
+      BasicBlock *IfTrue, *IfFalse;
+      if (Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse)) {
+        //std::cerr << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+        //       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n";
+
+        // Figure out where to insert instructions as necessary.
+        BasicBlock::iterator AfterPHIIt = BB->begin();
+        while (isa<PHINode>(AfterPHIIt)) ++AfterPHIIt;
+
+        BasicBlock::iterator I = BB->begin();
+        while (PHINode *PN = dyn_cast<PHINode>(I)) {
+          ++I;
+
+          // If we can eliminate this PHI by directly computing it based on the
+          // condition, do so now.  We can't eliminate PHI nodes where the
+          // incoming values are defined in the conditional parts of the branch,
+          // so check for this.
+          //
+          if (DominatesMergePoint(PN->getIncomingValue(0), BB) &&
+              DominatesMergePoint(PN->getIncomingValue(1), BB)) {
+            Value *TrueVal =
+              PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+            Value *FalseVal =
+              PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+
+            // FIXME: when we have a 'select' statement, we can be completely
+            // generic and clean here and let the instcombine pass clean up
+            // after us, by folding the select instructions away when possible.
+            //
+            if (TrueVal == FalseVal) {
+              // Degenerate case...
+              PN->replaceAllUsesWith(TrueVal);
+              BB->getInstList().erase(PN);
+              Changed = true;
+            } else if (isa<ConstantBool>(TrueVal) &&
+                       isa<ConstantBool>(FalseVal)) {
+              if (TrueVal == ConstantBool::True) {
+                // The PHI node produces the same thing as the condition.
+                PN->replaceAllUsesWith(IfCond);
+              } else {
+                // The PHI node produces the inverse of the condition.  Insert a
+                // "NOT" instruction, which is really a XOR.
+                Value *InverseCond =
+                  BinaryOperator::createNot(IfCond, IfCond->getName()+".inv",
+                                            AfterPHIIt);
+                PN->replaceAllUsesWith(InverseCond);
+              }
+              BB->getInstList().erase(PN);
+              Changed = true;
+            } else if (isa<ConstantInt>(TrueVal) && isa<ConstantInt>(FalseVal)){
+              // If this is a PHI of two constant integers, we insert a cast of
+              // the boolean to the integer type in question, giving us 0 or 1.
+              // Then we multiply this by the difference of the two constants,
+              // giving us 0 if false, and the difference if true.  We add this
+              // result to the base constant, giving us our final value.  We
+              // rely on the instruction combiner to eliminate many special
+              // cases, like turning multiplies into shifts when possible.
+              std::string Name = PN->getName(); PN->setName("");
+              Value *TheCast = new CastInst(IfCond, TrueVal->getType(),
+                                            Name, AfterPHIIt);
+              Constant *TheDiff = ConstantExpr::get(Instruction::Sub,
+                                                    cast<Constant>(TrueVal),
+                                                    cast<Constant>(FalseVal));
+              Value *V = TheCast;
+              if (TheDiff != ConstantInt::get(TrueVal->getType(), 1))
+                V = BinaryOperator::create(Instruction::Mul, TheCast,
+                                           TheDiff, TheCast->getName()+".scale",
+                                           AfterPHIIt);
+              if (!cast<Constant>(FalseVal)->isNullValue())
+                V = BinaryOperator::create(Instruction::Add, V, FalseVal,
+                                           V->getName()+".offs", AfterPHIIt);
+              PN->replaceAllUsesWith(V);
+              BB->getInstList().erase(PN);
+              Changed = true;
+            } else if (isa<ConstantInt>(FalseVal) &&
+                       cast<Constant>(FalseVal)->isNullValue()) {
+              // If the false condition is an integral zero value, we can
+              // compute the PHI by multiplying the condition by the other
+              // value.
+              std::string Name = PN->getName(); PN->setName("");
+              Value *TheCast = new CastInst(IfCond, TrueVal->getType(),
+                                            Name+".c", AfterPHIIt);
+              Value *V = BinaryOperator::create(Instruction::Mul, TrueVal,
+                                                TheCast, Name, AfterPHIIt);
+              PN->replaceAllUsesWith(V);
+              BB->getInstList().erase(PN);
+              Changed = true;
+            } else if (isa<ConstantInt>(TrueVal) &&
+                       cast<Constant>(TrueVal)->isNullValue()) {
+              // If the true condition is an integral zero value, we can compute
+              // the PHI by multiplying the inverse condition by the other
+              // value.
+              std::string Name = PN->getName(); PN->setName("");
+              Value *NotCond = BinaryOperator::createNot(IfCond, Name+".inv",
+                                                         AfterPHIIt);
+              Value *TheCast = new CastInst(NotCond, TrueVal->getType(),
+                                            Name+".inv", AfterPHIIt);
+              Value *V = BinaryOperator::create(Instruction::Mul, FalseVal,
+                                                TheCast, Name, AfterPHIIt);
+              PN->replaceAllUsesWith(V);
+              BB->getInstList().erase(PN);
+              Changed = true;
+            }
+          }
+        }
+      }
+    }
   
   return Changed;
 }


Index: llvm/lib/Transforms/Utils/ValueMapper.cpp
diff -u llvm/lib/Transforms/Utils/ValueMapper.cpp:1.10 llvm/lib/Transforms/Utils/ValueMapper.cpp:1.10.2.1
--- llvm/lib/Transforms/Utils/ValueMapper.cpp:1.10	Mon Jan 12 13:10:58 2004
+++ llvm/lib/Transforms/Utils/ValueMapper.cpp	Mon Mar  1 17:58:16 2004
@@ -28,7 +28,7 @@
 
   if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
     if (isa<ConstantIntegral>(C) || isa<ConstantFP>(C) ||
-        isa<ConstantPointerNull>(C))
+        isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C))
       return VMSlot = C;           // Primitive constants map directly
     else if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(C)) {
       GlobalValue *MV = cast<GlobalValue>(MapValue((Value*)CPR->getValue(),VM));