[llvm-commits] [parallel] CVS: llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp ADCE.cpp GCSE.cpp InstructionCombining.cpp LICM.cpp LoopSimplify.cpp LowerAllocations.cpp LowerInvoke.cpp LowerSwitch.cpp SCCP.cpp TailDuplication.cpp TailRecursionElimination.cpp

Mon Mar 1 18:05:23 PST 2004

Changes in directory llvm/lib/Transforms/Scalar:

BasicBlockPlacement.cpp added (r1.2.2.1)
ADCE.cpp updated: 1.70 -> 1.70.2.1
GCSE.cpp updated: 1.33 -> 1.33.2.1
InstructionCombining.cpp updated: 1.149 -> 1.149.2.1
LICM.cpp updated: 1.55 -> 1.55.2.1
LoopSimplify.cpp updated: 1.30 -> 1.30.2.1
LowerAllocations.cpp updated: 1.43 -> 1.43.2.1
LowerInvoke.cpp updated: 1.4 -> 1.4.4.1
LowerSwitch.cpp updated: 1.10 -> 1.10.2.1
SCCP.cpp updated: 1.88 -> 1.88.2.1
TailDuplication.cpp updated: 1.11 -> 1.11.2.1
TailRecursionElimination.cpp updated: 1.12 -> 1.12.2.1

---
Log message:

Merge from trunk

---
Diffs of the changes:  (+992 -116)

Index: llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
diff -c /dev/null llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp:1.2.2.1
*** /dev/null	Mon Mar  1 17:58:28 2004

--- llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp	Mon Mar  1 17:58:16 2004
***************
*** 0 ****
--- 1,139 ----
+ //===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+ // 
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by the LLVM research group and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ // 
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements a very simple profile guided basic block placement
+ // algorithm.  The idea is to put frequently executed blocks together at the
+ // start of the function, and hopefully increase the number of fall-through
+ // conditional branches.  If there is no profile information for a particular
+ // function, this pass basically orders blocks in depth-first order
+ //
+ // The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+ // Positioning" by Pettis and Hansen, except that it uses basic block counts
+ // instead of edge counts.  This should be improved in many ways, but is very
+ // simple for now.
+ //
+ // Basically we "place" the entry block, then loop over all successors in a DFO,
+ // placing the most frequently executed successor until we run out of blocks.  I
+ // told you this was _extremely_ simplistic. :) This is also much slower than it
+ // could be.  When it becomes important, this pass will be rewritten to use a
+ // better algorithm, and then we can worry about efficiency.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include "llvm/Analysis/ProfileInfo.h"
+ #include "llvm/Function.h"
+ #include "llvm/Pass.h"
+ #include "llvm/Support/CFG.h"
+ #include "Support/Statistic.h"
+ #include <set>
+ using namespace llvm;
+ 
+ namespace {
+   Statistic<> NumMoved("block-placement", "Number of basic blocks moved");
+   
+   struct BlockPlacement : public FunctionPass {
+     virtual bool runOnFunction(Function &F);
+ 
+     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+       AU.setPreservesCFG();
+       AU.addRequired<ProfileInfo>();
+       //AU.addPreserved<ProfileInfo>();  // Does this work?
+     }
+   private:
+     /// PI - The profile information that is guiding us.
+     ///
+     ProfileInfo *PI;
+ 
+     /// NumMovedBlocks - Every time we move a block, increment this counter.
+     ///
+     unsigned NumMovedBlocks;
+ 
+     /// PlacedBlocks - Every time we place a block, remember it so we don't get
+     /// into infinite loops.
+     std::set<BasicBlock*> PlacedBlocks;
+ 
+     /// InsertPos - This an iterator to the next place we want to insert a
+     /// block.
+     Function::iterator InsertPos;
+ 
+     /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+     /// successors.
+     void PlaceBlocks(BasicBlock *BB);
+   };
+ 
+   RegisterOpt<BlockPlacement> X("block-placement",
+                                 "Profile Guided Basic Block Placement");
+ }
+ 
+ bool BlockPlacement::runOnFunction(Function &F) {
+   PI = &getAnalysis<ProfileInfo>();
+ 
+   NumMovedBlocks = 0;
+   InsertPos = F.begin(); 
+ 
+   // Recursively place all blocks.
+   PlaceBlocks(F.begin());
+   
+   PlacedBlocks.clear();
+   NumMoved += NumMovedBlocks;
+   return NumMovedBlocks != 0;
+ }
+ 
+ 
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+   assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+   PlacedBlocks.insert(BB);
+ 
+   // Place the specified block.
+   if (&*InsertPos != BB) {
+     // Use splice to move the block into the right place.  This avoids having to
+     // remove the block from the function then readd it, which causes a bunch of
+     // symbol table traffic that is entirely pointless.
+     Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+     Blocks.splice(InsertPos, Blocks, BB);
+ 
+     ++NumMovedBlocks;
+   } else {
+     // This block is already in the right place, we don't have to do anything.
+     ++InsertPos;
+   }
+ 
+   // Keep placing successors until we run out of ones to place.  Note that this
+   // loop is very inefficient (N^2) for blocks with many successors, like switch
+   // statements.  FIXME!
+   while (1) {
+     // Okay, now place any unplaced successors.
+     succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+     
+     // Scan for the first unplaced successor.
+     for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+       /*empty*/;
+     if (SI == E) return;  // No more successors to place.
+     
+     unsigned MaxExecutionCount = PI->getExecutionCount(*SI);
+     BasicBlock *MaxSuccessor = *SI;
+ 
+     // Scan for more frequently executed successors
+     for (; SI != E; ++SI)
+       if (!PlacedBlocks.count(*SI)) {
+         unsigned Count = PI->getExecutionCount(*SI);
+         if (Count > MaxExecutionCount ||
+             // Prefer to not disturb the code.
+             (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+           MaxExecutionCount = Count;
+           MaxSuccessor = *SI;
+         }
+       }
+ 
+     // Now that we picked the maximally executed successor, place it.
+     PlaceBlocks(MaxSuccessor);
+   }
+ }


Index: llvm/lib/Transforms/Scalar/ADCE.cpp
diff -u llvm/lib/Transforms/Scalar/ADCE.cpp:1.70 llvm/lib/Transforms/Scalar/ADCE.cpp:1.70.2.1
--- llvm/lib/Transforms/Scalar/ADCE.cpp:1.70	Fri Dec 19 03:08:34 2003
+++ llvm/lib/Transforms/Scalar/ADCE.cpp	Mon Mar  1 17:58:16 2004
@@ -144,6 +144,7 @@
         // Delete the instruction...
         I = BB->getInstList().erase(I);
         Changed = true;
+        ++NumInstRemoved;
       } else {
         ++I;
       }


Index: llvm/lib/Transforms/Scalar/GCSE.cpp
diff -u llvm/lib/Transforms/Scalar/GCSE.cpp:1.33 llvm/lib/Transforms/Scalar/GCSE.cpp:1.33.2.1
--- llvm/lib/Transforms/Scalar/GCSE.cpp:1.33	Fri Jan  9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/GCSE.cpp	Mon Mar  1 17:58:16 2004
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ValueNumbering.h"
 #include "llvm/Support/InstIterator.h"
 #include "Support/Statistic.h"
+#include "Support/Debug.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -165,6 +166,9 @@
 //
 void GCSE::ReplaceInstWithInst(Instruction *First, BasicBlock::iterator SI) {
   Instruction &Second = *SI;
+
+  DEBUG(std::cerr << "GCSE: Substituting %" << First->getName() << " for: "
+                  << Second);
   
   //cerr << "DEL " << (void*)Second << Second;
 


Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp
diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149.2.1
--- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149	Wed Jan 14 00:06:08 2004
+++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp	Mon Mar  1 17:58:16 2004
@@ -35,6 +35,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Pass.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -60,15 +61,25 @@
     std::vector<Instruction*> WorkList;
     TargetData *TD;
 
-    void AddUsesToWorkList(Instruction &I) {
-      // The instruction was simplified, add all users of the instruction to
-      // the work lists because they might get more simplified now...
-      //
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(Instruction &I) {
       for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
            UI != UE; ++UI)
         WorkList.push_back(cast<Instruction>(*UI));
     }
 
+    /// AddUsesToWorkList - When an instruction is simplified, add operands to
+    /// the work lists because they might get more simplified now.
+    ///
+    void AddUsesToWorkList(Instruction &I) {
+      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+          WorkList.push_back(Op);
+    }
+
     // removeFromWorkList - remove all instances of I from the worklist.
     void removeFromWorkList(Instruction *I);
   public:
@@ -116,12 +127,13 @@
     // InsertNewInstBefore - insert an instruction New before instruction Old
     // in the program.  Add the new instruction to the worklist.
     //
-    void InsertNewInstBefore(Instruction *New, Instruction &Old) {
+    Value *InsertNewInstBefore(Instruction *New, Instruction &Old) {
       assert(New && New->getParent() == 0 &&
              "New instruction already inserted into a basic block!");
       BasicBlock *BB = Old.getParent();
       BB->getInstList().insert(&Old, New);  // Insert inst
       WorkList.push_back(New);              // Add to worklist
+      return New;
     }
 
   public:
@@ -132,10 +144,24 @@
     // modified.
     //
     Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
-      AddUsesToWorkList(I);         // Add all modified instrs to worklist
+      AddUsersToWorkList(I);         // Add all modified instrs to worklist
       I.replaceAllUsesWith(V);
       return &I;
     }
+
+    // EraseInstFromFunction - When dealing with an instruction that has side
+    // effects or produces a void value, we can't rely on DCE to delete the
+    // instruction.  Instead, visit methods should return the value returned by
+    // this function.
+    Instruction *EraseInstFromFunction(Instruction &I) {
+      assert(I.use_empty() && "Cannot erase instruction that is used!");
+      AddUsesToWorkList(I);
+      removeFromWorkList(&I);
+      I.getParent()->getInstList().erase(&I);
+      return 0;  // Don't do anything with FI
+    }
+
+
   private:
     /// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
     /// InsertBefore instruction.  This is specialized a bit to avoid inserting
@@ -173,6 +199,31 @@
   return V->hasOneUse() || isa<Constant>(V);
 }
 
+// getSignedIntegralType - Given an unsigned integral type, return the signed
+// version of it that has the same size.
+static const Type *getSignedIntegralType(const Type *Ty) {
+  switch (Ty->getPrimitiveID()) {
+  default: assert(0 && "Invalid unsigned integer type!"); abort();
+  case Type::UByteTyID:  return Type::SByteTy;
+  case Type::UShortTyID: return Type::ShortTy;
+  case Type::UIntTyID:   return Type::IntTy;
+  case Type::ULongTyID:  return Type::LongTy;
+  }
+}
+
+// getPromotedType - Return the specified type promoted as it would be to pass
+// though a va_arg area...
+static const Type *getPromotedType(const Type *Ty) {
+  switch (Ty->getPrimitiveID()) {
+  case Type::SByteTyID:
+  case Type::ShortTyID:  return Type::IntTy;
+  case Type::UByteTyID:
+  case Type::UShortTyID: return Type::UIntTy;
+  case Type::FloatTyID:  return Type::DoubleTy;
+  default:               return Ty;
+  }
+}
+
 // SimplifyCommutative - This performs a few simplifications for commutative
 // operators:
 //
@@ -415,7 +466,8 @@
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
   // X + 0 --> X
-  if (RHS == Constant::getNullValue(I.getType()))
+  if (!I.getType()->isFloatingPoint() &&    // -0 + +0 = +0, so it's not a noop
+      RHS == Constant::getNullValue(I.getType()))
     return ReplaceInstUsesWith(I, LHS);
 
   // X + X --> X << 1
@@ -512,7 +564,8 @@
       // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
       // is not used by anyone else...
       //
-      if (Op1I->getOpcode() == Instruction::Sub) {
+      if (Op1I->getOpcode() == Instruction::Sub &&
+          !Op1I->getType()->isFloatingPoint()) {
         // Swap the two operands of the subexpr...
         Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
         Op1I->setOperand(0, IIOp1);
@@ -556,6 +609,26 @@
   return 0;
 }
 
+/// isSignBitCheck - Given an exploded setcc instruction, return true if it is
+/// really just returns true if the most significant (sign) bit is set.
+static bool isSignBitCheck(unsigned Opcode, Value *LHS, ConstantInt *RHS) {
+  if (RHS->getType()->isSigned()) {
+    // True if source is LHS < 0 or LHS <= -1
+    return Opcode == Instruction::SetLT && RHS->isNullValue() ||
+           Opcode == Instruction::SetLE && RHS->isAllOnesValue();
+  } else {
+    ConstantUInt *RHSC = cast<ConstantUInt>(RHS);
+    // True if source is LHS > 127 or LHS >= 128, where the constants depend on
+    // the size of the integer type.
+    if (Opcode == Instruction::SetGE)
+      return RHSC->getValue() == 1ULL<<(RHS->getType()->getPrimitiveSize()*8-1);
+    if (Opcode == Instruction::SetGT)
+      return RHSC->getValue() ==
+        (1ULL << (RHS->getType()->getPrimitiveSize()*8-1))-1;
+  }
+  return false;
+}
+
 Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0);
@@ -598,6 +671,52 @@
     if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
       return BinaryOperator::create(Instruction::Mul, Op0v, Op1v);
 
+  // If one of the operands of the multiply is a cast from a boolean value, then
+  // we know the bool is either zero or one, so this is a 'masking' multiply.
+  // See if we can simplify things based on how the boolean was originally
+  // formed.
+  CastInst *BoolCast = 0;
+  if (CastInst *CI = dyn_cast<CastInst>(I.getOperand(0)))
+    if (CI->getOperand(0)->getType() == Type::BoolTy)
+      BoolCast = CI;
+  if (!BoolCast)
+    if (CastInst *CI = dyn_cast<CastInst>(I.getOperand(1)))
+      if (CI->getOperand(0)->getType() == Type::BoolTy)
+        BoolCast = CI;
+  if (BoolCast) {
+    if (SetCondInst *SCI = dyn_cast<SetCondInst>(BoolCast->getOperand(0))) {
+      Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
+      const Type *SCOpTy = SCIOp0->getType();
+
+      // If the setcc is true iff the sign bit of X is set, then convert this
+      // multiply into a shift/and combination.
+      if (isa<ConstantInt>(SCIOp1) &&
+          isSignBitCheck(SCI->getOpcode(), SCIOp0, cast<ConstantInt>(SCIOp1))) {
+        // Shift the X value right to turn it into "all signbits".
+        Constant *Amt = ConstantUInt::get(Type::UByteTy,
+                                          SCOpTy->getPrimitiveSize()*8-1);
+        if (SCIOp0->getType()->isUnsigned()) {
+          const Type *NewTy = getSignedIntegralType(SCIOp0->getType());
+          SCIOp0 = InsertNewInstBefore(new CastInst(SCIOp0, NewTy,
+                                                    SCIOp0->getName()), I);
+        }
+
+        Value *V =
+          InsertNewInstBefore(new ShiftInst(Instruction::Shr, SCIOp0, Amt,
+                                            BoolCast->getOperand(0)->getName()+
+                                            ".mask"), I);
+
+        // If the multiply type is not the same as the source type, sign extend
+        // or truncate to the multiply type.
+        if (I.getType() != V->getType())
+          V = InsertNewInstBefore(new CastInst(V, I.getType(), V->getName()),I);
+        
+        Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
+        return BinaryOperator::create(Instruction::And, V, OtherOp);
+      }
+    }
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -1001,15 +1120,26 @@
   return Changed ? &I : 0;
 }
 
+// XorSelf - Implements: X ^ X --> 0
+struct XorSelf {
+  Value *RHS;
+  XorSelf(Value *rhs) : RHS(rhs) {}
+  bool shouldApply(Value *LHS) const { return LHS == RHS; }
+  Instruction *apply(BinaryOperator &Xor) const {
+    return &Xor;
+  }
+};
 
 
 Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // xor X, X = 0
-  if (Op0 == Op1)
+  // xor X, X = 0, even if X is nested in a sequence of Xor's.
+  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
+    assert(Result == &I && "AssociativeOpt didn't work?");
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
 
   if (ConstantIntegral *RHS = dyn_cast<ConstantIntegral>(Op1)) {
     // xor X, 0 == X
@@ -1074,7 +1204,7 @@
                                 ConstantIntegral::getAllOnesValue(I.getType()));
 
   if (Instruction *Op1I = dyn_cast<Instruction>(Op1))
-    if (Op1I->getOpcode() == Instruction::Or)
+    if (Op1I->getOpcode() == Instruction::Or) {
       if (Op1I->getOperand(0) == Op0) {              // B^(B|A) == (A|B)^B
         cast<BinaryOperator>(Op1I)->swapOperands();
         I.swapOperands();
@@ -1082,7 +1212,13 @@
       } else if (Op1I->getOperand(1) == Op0) {       // B^(A|B) == (A|B)^B
         I.swapOperands();
         std::swap(Op0, Op1);
-      }
+      }      
+    } else if (Op1I->getOpcode() == Instruction::Xor) {
+      if (Op0 == Op1I->getOperand(0))                        // A^(A^B) == B
+        return ReplaceInstUsesWith(I, Op1I->getOperand(1));
+      else if (Op0 == Op1I->getOperand(1))                   // A^(B^A) == B
+        return ReplaceInstUsesWith(I, Op1I->getOperand(0));
+    }
 
   if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
     if (Op0I->getOpcode() == Instruction::Or && Op0I->hasOneUse()) {
@@ -1094,6 +1230,11 @@
         return BinaryOperator::create(Instruction::And, Op0I->getOperand(0),
                                       NotB);
       }
+    } else if (Op0I->getOpcode() == Instruction::Xor) {
+      if (Op1 == Op0I->getOperand(0))                        // (A^B)^A == B
+        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+      else if (Op1 == Op0I->getOperand(1))                   // (B^A)^A == B
+        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
     }
 
   // (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1^C2 == 0
@@ -1250,14 +1391,7 @@
               Value *X = BO->getOperand(0);
               // If 'X' is not signed, insert a cast now...
               if (!BOC->getType()->isSigned()) {
-                const Type *DestTy;
-                switch (BOC->getType()->getPrimitiveID()) {
-                case Type::UByteTyID:  DestTy = Type::SByteTy; break;
-                case Type::UShortTyID: DestTy = Type::ShortTy; break;
-                case Type::UIntTyID:   DestTy = Type::IntTy;   break;
-                case Type::ULongTyID:  DestTy = Type::LongTy;  break;
-                default: assert(0 && "Invalid unsigned integer type!"); abort();
-                }
+                const Type *DestTy = getSignedIntegralType(BOC->getType());
                 CastInst *NewCI = new CastInst(X,DestTy,X->getName()+".signed");
                 InsertNewInstBefore(NewCI, I);
                 X = NewCI;
@@ -1270,6 +1404,43 @@
         default: break;
         }
       }
+    } else {  // Not a SetEQ/SetNE
+      // If the LHS is a cast from an integral value of the same size, 
+      if (CastInst *Cast = dyn_cast<CastInst>(Op0)) {
+        Value *CastOp = Cast->getOperand(0);
+        const Type *SrcTy = CastOp->getType();
+        unsigned SrcTySize = SrcTy->getPrimitiveSize();
+        if (SrcTy != Cast->getType() && SrcTy->isInteger() &&
+            SrcTySize == Cast->getType()->getPrimitiveSize()) {
+          assert((SrcTy->isSigned() ^ Cast->getType()->isSigned()) && 
+                 "Source and destination signednesses should differ!");
+          if (Cast->getType()->isSigned()) {
+            // If this is a signed comparison, check for comparisons in the
+            // vicinity of zero.
+            if (I.getOpcode() == Instruction::SetLT && CI->isNullValue())
+              // X < 0  => x > 127
+              return BinaryOperator::create(Instruction::SetGT, CastOp,
+                         ConstantUInt::get(SrcTy, (1ULL << (SrcTySize*8-1))-1));
+            else if (I.getOpcode() == Instruction::SetGT &&
+                     cast<ConstantSInt>(CI)->getValue() == -1)
+              // X > -1  => x < 128
+              return BinaryOperator::create(Instruction::SetLT, CastOp,
+                         ConstantUInt::get(SrcTy, 1ULL << (SrcTySize*8-1)));
+          } else {
+            ConstantUInt *CUI = cast<ConstantUInt>(CI);
+            if (I.getOpcode() == Instruction::SetLT &&
+                CUI->getValue() == 1ULL << (SrcTySize*8-1))
+              // X < 128 => X > -1
+              return BinaryOperator::create(Instruction::SetGT, CastOp,
+                                            ConstantSInt::get(SrcTy, -1));
+            else if (I.getOpcode() == Instruction::SetGT &&
+                     CUI->getValue() == (1ULL << (SrcTySize*8-1))-1)
+              // X > 127 => X < 0
+              return BinaryOperator::create(Instruction::SetLT, CastOp,
+                                            Constant::getNullValue(SrcTy));
+          }
+        }
+      }
     }
 
     // Check to see if we are comparing against the minimum or maximum value...
@@ -1306,6 +1477,15 @@
       if (I.getOpcode() == Instruction::SetLE)       // A <= MAX-1 -> A != MAX
         return BinaryOperator::create(Instruction::SetNE, Op0, AddOne(CI));
     }
+
+    // If we still have a setle or setge instruction, turn it into the
+    // appropriate setlt or setgt instruction.  Since the border cases have
+    // already been handled above, this requires little checking.
+    //
+    if (I.getOpcode() == Instruction::SetLE)
+      return BinaryOperator::create(Instruction::SetLT, Op0, AddOne(CI));
+    if (I.getOpcode() == Instruction::SetGE)
+      return BinaryOperator::create(Instruction::SetGT, Op0, SubOne(CI));
   }
 
   // Test to see if the operands of the setcc are casted versions of other
@@ -1416,9 +1596,14 @@
     // of a signed value.
     //
     unsigned TypeBits = Op0->getType()->getPrimitiveSize()*8;
-    if (CUI->getValue() >= TypeBits &&
-        (!Op0->getType()->isSigned() || isLeftShift))
-      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+    if (CUI->getValue() >= TypeBits) {
+      if (!Op0->getType()->isSigned() || isLeftShift)
+        return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+      else {
+        I.setOperand(1, ConstantUInt::get(Type::UByteTy, TypeBits-1));
+        return &I;
+      }
+    }
 
     // ((X*C1) << C2) == (X * (C1 << C2))
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
@@ -1482,6 +1667,8 @@
         // Check for (A << c1) << c2   and   (A >> c1) >> c2
         if (I.getOpcode() == Op0SI->getOpcode()) {
           unsigned Amt = ShiftAmt1+ShiftAmt2;   // Fold into one big shift...
+          if (Op0->getType()->getPrimitiveSize()*8 < Amt)
+            Amt = Op0->getType()->getPrimitiveSize()*8;
           return new ShiftInst(I.getOpcode(), Op0SI->getOperand(0),
                                ConstantUInt::get(Type::UByteTy, Amt));
         }
@@ -1747,6 +1934,23 @@
 // CallInst simplification
 //
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  // Intrinsics cannot occur in an invoke, so handle them here instead of in
+  // visitCallSite.
+  if (Function *F = CI.getCalledFunction())
+    switch (F->getIntrinsicID()) {
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::memset:
+      // memmove/cpy/set of zero bytes is a noop.
+      if (Constant *NumBytes = dyn_cast<Constant>(CI.getOperand(3))) {
+        if (NumBytes->isNullValue())
+          return EraseInstFromFunction(CI);
+      }
+      break;
+    default:
+      break;
+    }
+
   return visitCallSite(&CI);
 }
 
@@ -1756,19 +1960,6 @@
   return visitCallSite(&II);
 }
 
-// getPromotedType - Return the specified type promoted as it would be to pass
-// though a va_arg area...
-static const Type *getPromotedType(const Type *Ty) {
-  switch (Ty->getPrimitiveID()) {
-  case Type::SByteTyID:
-  case Type::ShortTyID:  return Type::IntTy;
-  case Type::UByteTyID:
-  case Type::UShortTyID: return Type::UIntTy;
-  case Type::FloatTyID:  return Type::DoubleTy;
-  default:               return Ty;
-  }
-}
-
 // visitCallSite - Improvements for call and invoke instructions.
 //
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -1838,7 +2029,7 @@
              UI != E; ++UI)
           if (PHINode *PN = dyn_cast<PHINode>(*UI))
             if (PN->getParent() == II->getNormalDest() ||
-                PN->getParent() == II->getExceptionalDest())
+                PN->getParent() == II->getUnwindDest())
               return false;
   }
 
@@ -1903,7 +2094,7 @@
 
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NC = new InvokeInst(Callee, II->getNormalDest(), II->getExceptionalDest(),
+    NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
                         Args, Caller->getName(), Caller);
   } else {
     NC = new CallInst(Callee, Args, Caller->getName(), Caller);
@@ -1925,7 +2116,7 @@
         // Otherwise, it's a call, just insert cast right after the call instr
         InsertNewInstBefore(NC, *Caller);
       }
-      AddUsesToWorkList(*Caller);
+      AddUsersToWorkList(*Caller);
     } else {
       NV = Constant::getNullValue(Caller->getType());
     }
@@ -1945,6 +2136,35 @@
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   if (Value *V = hasConstantValue(&PN))
     return ReplaceInstUsesWith(PN, V);
+
+  // If the only user of this instruction is a cast instruction, and all of the
+  // incoming values are constants, change this PHI to merge together the casted
+  // constants.
+  if (PN.hasOneUse())
+    if (CastInst *CI = dyn_cast<CastInst>(PN.use_back()))
+      if (CI->getType() != PN.getType()) {  // noop casts will be folded
+        bool AllConstant = true;
+        for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+          if (!isa<Constant>(PN.getIncomingValue(i))) {
+            AllConstant = false;
+            break;
+          }
+        if (AllConstant) {
+          // Make a new PHI with all casted values.
+          PHINode *New = new PHINode(CI->getType(), PN.getName(), &PN);
+          for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+            Constant *OldArg = cast<Constant>(PN.getIncomingValue(i));
+            New->addIncoming(ConstantExpr::getCast(OldArg, New->getType()),
+                             PN.getIncomingBlock(i));
+          }
+
+          // Update the cast instruction.
+          CI->setOperand(0, New);
+          WorkList.push_back(CI);    // revisit the cast instruction to fold.
+          WorkList.push_back(New);   // Make sure to revisit the new Phi
+          return &PN;                // PN is now dead!
+        }
+      }
   return 0;
 }
 
@@ -1952,9 +2172,14 @@
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // Is it 'getelementptr %P, long 0'  or 'getelementptr %P'
   // If so, eliminate the noop.
-  if ((GEP.getNumOperands() == 2 &&
-       GEP.getOperand(1) == Constant::getNullValue(Type::LongTy)) ||
-      GEP.getNumOperands() == 1)
+  if (GEP.getNumOperands() == 1)
+    return ReplaceInstUsesWith(GEP, GEP.getOperand(0));
+
+  bool HasZeroPointerIndex = false;
+  if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
+    HasZeroPointerIndex = C->isNullValue();
+
+  if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
     return ReplaceInstUsesWith(GEP, GEP.getOperand(0));
 
   // Combine Indices - If the source pointer to this getelementptr instruction
@@ -1975,12 +2200,20 @@
       assert(Sum && "Constant folding of longs failed!?");
       GEP.setOperand(0, Src->getOperand(0));
       GEP.setOperand(1, Sum);
-      AddUsesToWorkList(*Src);   // Reduce use count of Src
+      AddUsersToWorkList(*Src);   // Reduce use count of Src
       return &GEP;
     } else if (Src->getNumOperands() == 2) {
       // Replace: gep (gep %P, long B), long A, ...
       // With:    T = long A+B; gep %P, T, ...
       //
+      // Note that if our source is a gep chain itself that we wait for that
+      // chain to be resolved before we perform this transformation.  This
+      // avoids us creating a TON of code in some cases.
+      //
+      if (isa<GetElementPtrInst>(Src->getOperand(0)) &&
+          cast<Instruction>(Src->getOperand(0))->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
+
       Value *Sum = BinaryOperator::create(Instruction::Add, Src->getOperand(1),
                                           GEP.getOperand(1),
                                           Src->getName()+".sum", &GEP);
@@ -2021,6 +2254,31 @@
       // Replace all uses of the GEP with the new constexpr...
       return ReplaceInstUsesWith(GEP, CE);
     }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP.getOperand(0))) {
+    if (CE->getOpcode() == Instruction::Cast) {
+      if (HasZeroPointerIndex) {
+        // transform: GEP (cast [10 x ubyte]* X to [0 x ubyte]*), long 0, ...
+        // into     : GEP [10 x ubyte]* X, long 0, ...
+        //
+        // This occurs when the program declares an array extern like "int X[];"
+        //
+        Constant *X = CE->getOperand(0);
+        const PointerType *CPTy = cast<PointerType>(CE->getType());
+        if (const PointerType *XTy = dyn_cast<PointerType>(X->getType()))
+          if (const ArrayType *XATy =
+              dyn_cast<ArrayType>(XTy->getElementType()))
+            if (const ArrayType *CATy =
+                dyn_cast<ArrayType>(CPTy->getElementType()))
+              if (CATy->getElementType() == XATy->getElementType()) {
+                // At this point, we know that the cast source type is a pointer
+                // to an array of the same type as the destination pointer
+                // array.  Because the array type is never stepped over (there
+                // is a leading zero) we can fold the cast into this GEP.
+                GEP.setOperand(0, X);
+                return &GEP;
+              }
+      }
+    }
   }
 
   return 0;
@@ -2071,6 +2329,11 @@
       return &FI;
     }
 
+  // If we have 'free null' delete the instruction.  This can happen in stl code
+  // when lots of inlining happens.
+  if (isa<ConstantPointerNull>(Op))
+    return EraseInstFromFunction(FI);
+
   return 0;
 }
 
@@ -2087,11 +2350,13 @@
   // addressing...
   for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i)
     if (ConstantUInt *CU = dyn_cast<ConstantUInt>(CE->getOperand(i))) {
-      ConstantStruct *CS = cast<ConstantStruct>(C);
+      ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+      if (CS == 0) return 0;
       if (CU->getValue() >= CS->getValues().size()) return 0;
       C = cast<Constant>(CS->getValues()[CU->getValue()]);
     } else if (ConstantSInt *CS = dyn_cast<ConstantSInt>(CE->getOperand(i))) {
-      ConstantArray *CA = cast<ConstantArray>(C);
+      ConstantArray *CA = dyn_cast<ConstantArray>(C);
+      if (CA == 0) return 0;
       if ((uint64_t)CS->getValue() >= CA->getValues().size()) return 0;
       C = cast<Constant>(CA->getValues()[CS->getValue()]);
     } else 
@@ -2125,7 +2390,7 @@
 
 Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
   // Change br (not X), label True, label False to: br X, label False, True
-  if (BI.isConditional() && !isa<Constant>(BI.getCondition()))
+  if (BI.isConditional() && !isa<Constant>(BI.getCondition())) {
     if (Value *V = dyn_castNotVal(BI.getCondition())) {
       BasicBlock *TrueDest = BI.getSuccessor(0);
       BasicBlock *FalseDest = BI.getSuccessor(1);
@@ -2134,7 +2399,29 @@
       BI.setSuccessor(0, FalseDest);
       BI.setSuccessor(1, TrueDest);
       return &BI;
+    } else if (SetCondInst *I = dyn_cast<SetCondInst>(BI.getCondition())) {
+      // Cannonicalize setne -> seteq
+      if ((I->getOpcode() == Instruction::SetNE ||
+           I->getOpcode() == Instruction::SetLE ||
+           I->getOpcode() == Instruction::SetGE) && I->hasOneUse()) {
+        std::string Name = I->getName(); I->setName("");
+        Instruction::BinaryOps NewOpcode =
+          SetCondInst::getInverseCondition(I->getOpcode());
+        Value *NewSCC =  BinaryOperator::create(NewOpcode, I->getOperand(0),
+                                                I->getOperand(1), Name, I);
+        BasicBlock *TrueDest = BI.getSuccessor(0);
+        BasicBlock *FalseDest = BI.getSuccessor(1);
+        // Swap Destinations and condition...
+        BI.setCondition(NewSCC);
+        BI.setSuccessor(0, FalseDest);
+        BI.setSuccessor(1, TrueDest);
+        removeFromWorkList(I);
+        I->getParent()->getInstList().erase(I);
+        WorkList.push_back(cast<Instruction>(NewSCC));
+        return &BI;
+      }
     }
+  }
   return 0;
 }
 
@@ -2159,9 +2446,7 @@
     if (isInstructionTriviallyDead(I)) {
       // Add operands to the worklist...
       if (I->getNumOperands() < 4)
-        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-          if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
-            WorkList.push_back(Op);
+        AddUsesToWorkList(*I);
       ++NumDeadInst;
 
       I->getParent()->getInstList().erase(I);
@@ -2172,9 +2457,7 @@
     // Instruction isn't dead, see if we can constant propagate it...
     if (Constant *C = ConstantFoldInstruction(I)) {
       // Add operands to the worklist...
-      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
-          WorkList.push_back(Op);
+      AddUsesToWorkList(*I);
       ReplaceInstUsesWith(*I, C);
 
       ++NumConstProp;
@@ -2220,7 +2503,7 @@
 
       if (Result) {
         WorkList.push_back(Result);
-        AddUsesToWorkList(*Result);
+        AddUsersToWorkList(*Result);
       }
       Changed = true;
     }


Index: llvm/lib/Transforms/Scalar/LICM.cpp
diff -u llvm/lib/Transforms/Scalar/LICM.cpp:1.55 llvm/lib/Transforms/Scalar/LICM.cpp:1.55.2.1
--- llvm/lib/Transforms/Scalar/LICM.cpp:1.55	Wed Jan  7 18:09:44 2004
+++ llvm/lib/Transforms/Scalar/LICM.cpp	Mon Mar  1 17:58:16 2004
@@ -679,7 +679,7 @@
        I != E; ++I) {
     AliasSet &AS = *I;
     // We can promote this alias set if it has a store, if it is a "Must" alias
-    // set, and if the pointer is loop invariant.
+    // set, if the pointer is loop invariant, if if we are not eliminating any volatile loads or stores.
     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias() &&
         !AS.isVolatile() && isLoopInvariant(AS.begin()->first)) {
       assert(AS.begin() != AS.end() &&


Index: llvm/lib/Transforms/Scalar/LoopSimplify.cpp
diff -u llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30 llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30.2.1
--- llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30	Wed Jan  7 18:09:44 2004
+++ llvm/lib/Transforms/Scalar/LoopSimplify.cpp	Mon Mar  1 17:58:16 2004
@@ -151,7 +151,7 @@
                                        const std::vector<BasicBlock*> &Preds) {
   
   // Create new basic block, insert right before the original block...
-  BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB);
+  BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB->getParent(), BB);
 
   // The preheader first gets an unconditional branch to the loop header...
   BranchInst *BI = new BranchInst(BB, NewBB);
@@ -484,20 +484,44 @@
 /// dominators, dominator trees, and dominance frontiers) after a new block has
 /// been added to the CFG.
 ///
-/// This only supports the case when an existing block (known as "Exit"), had
-/// some of its predecessors factored into a new basic block.  This
+/// This only supports the case when an existing block (known as "NewBBSucc"),
+/// had some of its predecessors factored into a new basic block.  This
 /// transformation inserts a new basic block ("NewBB"), with a single
-/// unconditional branch to Exit, and moves some predecessors of "Exit" to now
-/// branch to NewBB.  These predecessors are listed in PredBlocks, even though
-/// they are the same as pred_begin(NewBB)/pred_end(NewBB).
+/// unconditional branch to NewBBSucc, and moves some predecessors of
+/// "NewBBSucc" to now branch to NewBB.  These predecessors are listed in
+/// PredBlocks, even though they are the same as
+/// pred_begin(NewBB)/pred_end(NewBB).
 ///
 void LoopSimplify::UpdateDomInfoForRevectoredPreds(BasicBlock *NewBB,
                                          std::vector<BasicBlock*> &PredBlocks) {
+  assert(!PredBlocks.empty() && "No predblocks??");
   assert(succ_begin(NewBB) != succ_end(NewBB) &&
          ++succ_begin(NewBB) == succ_end(NewBB) &&
          "NewBB should have a single successor!");
+  BasicBlock *NewBBSucc = *succ_begin(NewBB);
   DominatorSet &DS = getAnalysis<DominatorSet>();
 
+  // The newly inserted basic block will dominate existing basic blocks iff the
+  // PredBlocks dominate all of the non-pred blocks.  If all predblocks dominate
+  // the non-pred blocks, then they all must be the same block!
+  bool NewBBDominatesNewBBSucc = true;
+  {
+    BasicBlock *OnePred = PredBlocks[0];
+    for (unsigned i = 1, e = PredBlocks.size(); i != e; ++i)
+      if (PredBlocks[i] != OnePred) {
+        NewBBDominatesNewBBSucc = false;
+        break;
+      }
+
+    if (NewBBDominatesNewBBSucc)
+      for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc);
+           PI != E; ++PI)
+        if (*PI != NewBB && !DS.dominates(NewBBSucc, *PI)) {
+          NewBBDominatesNewBBSucc = false;
+          break;
+        }
+  }
+
   // Update dominator information...  The blocks that dominate NewBB are the
   // intersection of the dominators of predecessors, plus the block itself.
   // The newly created basic block does not dominate anything except itself.
@@ -508,13 +532,22 @@
   NewBBDomSet.insert(NewBB);  // All blocks dominate themselves...
   DS.addBasicBlock(NewBB, NewBBDomSet);
 
+  // If NewBB dominates some blocks, then it will dominate all blocks that
+  // NewBBSucc does.
+  if (NewBBDominatesNewBBSucc) {
+    BasicBlock *PredBlock = PredBlocks[0];
+    Function *F = NewBB->getParent();
+    for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+      if (DS.dominates(NewBBSucc, I))
+        DS.addDominator(I, NewBB);
+  }
+
   // Update immediate dominator information if we have it...
   BasicBlock *NewBBIDom = 0;
   if (ImmediateDominators *ID = getAnalysisToUpdate<ImmediateDominators>()) {
-    // This block does not strictly dominate anything, so it is not an immediate
-    // dominator.  To find the immediate dominator of the new exit node, we
-    // trace up the immediate dominators of a predecessor until we find a basic
-    // block that dominates the exit block.
+    // To find the immediate dominator of the new exit node, we trace up the
+    // immediate dominators of a predecessor until we find a basic block that
+    // dominates the exit block.
     //
     BasicBlock *Dom = PredBlocks[0];  // Some random predecessor...
     while (!NewBBDomSet.count(Dom)) {  // Loop until we find a dominator...
@@ -525,13 +558,21 @@
     // Set the immediate dominator now...
     ID->addNewBlock(NewBB, Dom);
     NewBBIDom = Dom;   // Reuse this if calculating DominatorTree info...
+
+    // If NewBB strictly dominates other blocks, we need to update their idom's
+    // now.  The only block that need adjustment is the NewBBSucc block, whose
+    // idom should currently be set to PredBlocks[0].
+    if (NewBBDominatesNewBBSucc) {
+      assert(ID->get(NewBBSucc) == PredBlocks[0] &&
+             "Immediate dominator update code broken!");
+      ID->setImmediateDominator(NewBBSucc, NewBB);
+    }
   }
 
   // Update DominatorTree information if it is active.
   if (DominatorTree *DT = getAnalysisToUpdate<DominatorTree>()) {
-    // NewBB doesn't dominate anything, so just create a node and link it into
-    // its immediate dominator.  If we don't have ImmediateDominator info
-    // around, calculate the idom as above.
+    // If we don't have ImmediateDominator info around, calculate the idom as
+    // above.
     DominatorTree::Node *NewBBIDomNode;
     if (NewBBIDom) {
       NewBBIDomNode = DT->getNode(NewBBIDom);
@@ -543,27 +584,58 @@
       }
     }
 
-    // Create the new dominator tree node...
-    DT->createNewNode(NewBB, NewBBIDomNode);
+    // Create the new dominator tree node... and set the idom of NewBB.
+    DominatorTree::Node *NewBBNode = DT->createNewNode(NewBB, NewBBIDomNode);
+
+    // If NewBB strictly dominates other blocks, then it is now the immediate
+    // dominator of NewBBSucc.  Update the dominator tree as appropriate.
+    if (NewBBDominatesNewBBSucc) {
+      DominatorTree::Node *NewBBSuccNode = DT->getNode(NewBBSucc);
+      assert(NewBBSuccNode->getIDom()->getBlock() == PredBlocks[0] &&
+             "Immediate tree update code broken!");
+      DT->changeImmediateDominator(NewBBSuccNode, NewBBNode);
+    }
   }
 
   // Update dominance frontier information...
   if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) {
-    // DF(NewBB) is {Exit} because NewBB does not strictly dominate Exit, but it
-    // does dominate itself (and there is an edge (NewBB -> Exit)).  Exit is the
-    // single successor of NewBB.
-    DominanceFrontier::DomSetType NewDFSet;
-    BasicBlock *Exit = *succ_begin(NewBB);
-    NewDFSet.insert(Exit);
-    DF->addBasicBlock(NewBB, NewDFSet);
+    // If NewBB dominates NewBBSucc, then the global dominance frontiers are not
+    // changed.  DF(NewBB) is now going to be the DF(PredBlocks[0]) without the
+    // stuff that the new block does not dominate a predecessor of.
+    if (NewBBDominatesNewBBSucc) {
+      DominanceFrontier::iterator DFI = DF->find(PredBlocks[0]);
+      if (DFI != DF->end()) {
+        DominanceFrontier::DomSetType Set = DFI->second;
+        // Filter out stuff in Set that we do not dominate a predecessor of.
+        for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
+               E = Set.end(); SetI != E;) {
+          bool DominatesPred = false;
+          for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
+               PI != E; ++PI)
+            if (DS.dominates(NewBB, *PI))
+              DominatesPred = true;
+          if (!DominatesPred)
+            Set.erase(SetI++);
+          else
+            ++SetI;
+        }
 
-    // Now we must loop over all of the dominance frontiers in the function,
-    // replacing occurrences of Exit with NewBB in some cases.  All blocks that
-    // dominate a block in PredBlocks and contained Exit in their dominance
-    // frontier must be updated to contain NewBB instead.  This only occurs if
-    // there is more than one block in PredBlocks.
-    //
-    if (PredBlocks.size() > 1) {
+        DF->addBasicBlock(NewBB, Set);
+      }
+
+    } else {
+      // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate
+      // NewBBSucc, but it does dominate itself (and there is an edge (NewBB ->
+      // NewBBSucc)).  NewBBSucc is the single successor of NewBB.
+      DominanceFrontier::DomSetType NewDFSet;
+      NewDFSet.insert(NewBBSucc);
+      DF->addBasicBlock(NewBB, NewDFSet);
+      
+      // Now we must loop over all of the dominance frontiers in the function,
+      // replacing occurrences of NewBBSucc with NewBB in some cases.  All
+      // blocks that dominate a block in PredBlocks and contained NewBBSucc in
+      // their dominance frontier must be updated to contain NewBB instead.
+      //
       for (unsigned i = 0, e = PredBlocks.size(); i != e; ++i) {
         BasicBlock *Pred = PredBlocks[i];
         // Get all of the dominators of the predecessor...
@@ -572,13 +644,13 @@
                PDE = PredDoms.end(); PDI != PDE; ++PDI) {
           BasicBlock *PredDom = *PDI;
 
-          // If the Exit node is in DF(PredDom), then PredDom didn't dominate
-          // Exit but did dominate a predecessor of it.  Now we change this
-          // entry to include NewBB in the DF instead of Exit.
+          // If the NewBBSucc node is in DF(PredDom), then PredDom didn't
+          // dominate NewBBSucc but did dominate a predecessor of it.  Now we
+          // change this entry to include NewBB in the DF instead of NewBBSucc.
           DominanceFrontier::iterator DFI = DF->find(PredDom);
           assert(DFI != DF->end() && "No dominance frontier for node?");
-          if (DFI->second.count(Exit)) {
-            DF->removeFromFrontier(DFI, Exit);
+          if (DFI->second.count(NewBBSucc)) {
+            DF->removeFromFrontier(DFI, NewBBSucc);
             DF->addToFrontier(DFI, NewBB);
           }
         }


Index: llvm/lib/Transforms/Scalar/LowerAllocations.cpp
diff -u llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43 llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43.2.1
--- llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43	Fri Jan  9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/LowerAllocations.cpp	Mon Mar  1 17:58:16 2004
@@ -7,7 +7,7 @@
 // 
 //===----------------------------------------------------------------------===//
 //
-// The LowerAllocations transformation is a target dependent tranformation
+// The LowerAllocations transformation is a target-dependent tranformation
 // because it depends on the size of data types and alignment constraints.
 //
 //===----------------------------------------------------------------------===//
@@ -67,8 +67,13 @@
 //
 bool LowerAllocations::doInitialization(Module &M) {
   const Type *SBPTy = PointerType::get(Type::SByteTy);
-  MallocFunc = M.getOrInsertFunction("malloc", SBPTy, Type::UIntTy, 0);
-  FreeFunc   = M.getOrInsertFunction("free"  , Type::VoidTy, SBPTy, 0);
+  MallocFunc = M.getNamedFunction("malloc");
+  FreeFunc   = M.getNamedFunction("free");
+
+  if (MallocFunc == 0)
+    MallocFunc = M.getOrInsertFunction("malloc", SBPTy, Type::UIntTy, 0);
+  if (FreeFunc == 0)
+    FreeFunc   = M.getOrInsertFunction("free"  , Type::VoidTy, SBPTy, 0);
 
   return true;
 }
@@ -101,13 +106,30 @@
         MallocArg = BinaryOperator::create(Instruction::Mul, MI->getOperand(0),
                                            MallocArg, "", I);
       }
+
+      const FunctionType *MallocFTy = MallocFunc->getFunctionType();
+      std::vector<Value*> MallocArgs;
       
+      if (MallocFTy->getNumParams() > 0 || MallocFTy->isVarArg()) {
+        if (MallocFTy->getNumParams() > 0 &&
+            MallocFTy->getParamType(0) != Type::UIntTy)
+          MallocArg = new CastInst(MallocArg, MallocFTy->getParamType(0), "",I);
+        MallocArgs.push_back(MallocArg);
+      }
+
+      // If malloc is prototyped to take extra arguments, pass nulls.
+      for (unsigned i = 1; i < MallocFTy->getNumParams(); ++i)
+       MallocArgs.push_back(Constant::getNullValue(MallocFTy->getParamType(i)));
+
       // Create the call to Malloc...
-      CallInst *MCall = new CallInst(MallocFunc,
-                                     std::vector<Value*>(1, MallocArg), "", I);
+      CallInst *MCall = new CallInst(MallocFunc, MallocArgs, "", I);
       
       // Create a cast instruction to convert to the right type...
-      CastInst *MCast = new CastInst(MCall, MI->getType(), "", I);
+      Value *MCast;
+      if (MCall->getType() != Type::VoidTy)
+        MCast = new CastInst(MCall, MI->getType(), "", I);
+      else
+        MCast = Constant::getNullValue(MI->getType());
       
       // Replace all uses of the old malloc inst with the cast inst
       MI->replaceAllUsesWith(MCast);
@@ -115,13 +137,23 @@
       Changed = true;
       ++NumLowered;
     } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
-      // Cast the argument to free into a ubyte*...
-      CastInst *MCast = new CastInst(FI->getOperand(0), 
-                                     PointerType::get(Type::SByteTy), "", I);
+      const FunctionType *FreeFTy = FreeFunc->getFunctionType();
+      std::vector<Value*> FreeArgs;
+      
+      if (FreeFTy->getNumParams() > 0 || FreeFTy->isVarArg()) {
+        Value *MCast = FI->getOperand(0);
+        if (FreeFTy->getNumParams() > 0 &&
+            FreeFTy->getParamType(0) != MCast->getType())
+          MCast = new CastInst(MCast, FreeFTy->getParamType(0), "", I);
+        FreeArgs.push_back(MCast);
+      }
+
+      // If malloc is prototyped to take extra arguments, pass nulls.
+      for (unsigned i = 1; i < FreeFTy->getNumParams(); ++i)
+       FreeArgs.push_back(Constant::getNullValue(FreeFTy->getParamType(i)));
       
       // Insert a call to the free function...
-      CallInst *FCall = new CallInst(FreeFunc, std::vector<Value*>(1, MCast),
-                                     "", I);
+      new CallInst(FreeFunc, FreeArgs, "", I);
       
       // Delete the old free instruction
       I = --BBIL.erase(I);


Index: llvm/lib/Transforms/Scalar/LowerInvoke.cpp
diff -u llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4 llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4.4.1
--- llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4	Wed Dec 10 14:22:42 2003
+++ llvm/lib/Transforms/Scalar/LowerInvoke.cpp	Mon Mar  1 17:58:16 2004
@@ -8,47 +8,205 @@
 //===----------------------------------------------------------------------===//
 //
 // This transformation is designed for use by code generators which do not yet
-// support stack unwinding.  This pass gives them the ability to execute any
-// program which does not throw an exception, by turning 'invoke' instructions
-// into calls and by turning 'unwind' instructions into calls to abort().
+// support stack unwinding.  This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort().  If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at making the 'invoke' instruction really expensive.
+// It basically inserts setjmp/longjmp calls to emulate the exception handling
+// as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Pass.h"
-#include "llvm/iTerminators.h"
-#include "llvm/iOther.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Constant.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "Support/Statistic.h"
+#include "Support/CommandLine.h"
+#include <csetjmp>
 using namespace llvm;
 
 namespace {
   Statistic<> NumLowered("lowerinvoke", "Number of invoke & unwinds replaced");
+  cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
 
   class LowerInvoke : public FunctionPass {
+    // Used for both models.
+    Function *WriteFn;
     Function *AbortFn;
+    Constant *AbortMessageInit;
+    Value *AbortMessage;
+    unsigned AbortMessageLength;
+
+    // Used for expensive EH support.
+    const Type *JBLinkTy;
+    GlobalVariable *JBListHead;
+    Function *SetJmpFn, *LongJmpFn;
   public:
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
+  private:
+    void writeAbortMessage(Instruction *IB);
+    bool insertCheapEHSupport(Function &F);
+    bool insertExpensiveEHSupport(Function &F);
   };
 
   RegisterOpt<LowerInvoke>
   X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
 }
 
+const PassInfo *llvm::LowerInvokePassID = X.getPassInfo();
+
 // Public Interface To the LowerInvoke pass.
 FunctionPass *llvm::createLowerInvokePass() { return new LowerInvoke(); }
 
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
+  const Type *VoidPtrTy = PointerType::get(Type::SByteTy);
+  AbortMessage = 0;
+  if (ExpensiveEHSupport) {
+    // Insert a type for the linked list of jump buffers.  Unfortunately, we
+    // don't know the size of the target's setjmp buffer, so we make a guess.
+    // If this guess turns out to be too small, bad stuff could happen.
+    unsigned JmpBufSize = 200;  // PPC has 192 words
+    assert(sizeof(jmp_buf) <= JmpBufSize*sizeof(void*) &&
+       "LowerInvoke doesn't know about targets with jmp_buf size > 200 words!");
+    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JmpBufSize);
+
+    { // The type is recursive, so use a type holder.
+      std::vector<const Type*> Elements;
+      OpaqueType *OT = OpaqueType::get();
+      Elements.push_back(PointerType::get(OT));
+      Elements.push_back(JmpBufTy);
+      PATypeHolder JBLType(StructType::get(Elements));
+      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
+      JBLinkTy = JBLType.get();
+    }
+
+    const Type *PtrJBList = PointerType::get(JBLinkTy);
+
+    // Now that we've done that, insert the jmpbuf list head global, unless it
+    // already exists.
+    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList)))
+      JBListHead = new GlobalVariable(PtrJBList, false,
+                                      GlobalValue::LinkOnceLinkage,
+                                      Constant::getNullValue(PtrJBList),
+                                      "llvm.sjljeh.jblist", &M);
+    SetJmpFn = M.getOrInsertFunction("llvm.setjmp", Type::IntTy,
+                                     PointerType::get(JmpBufTy), 0);
+    LongJmpFn = M.getOrInsertFunction("llvm.longjmp", Type::VoidTy,
+                                      PointerType::get(JmpBufTy),
+                                      Type::IntTy, 0);
+    
+    // The abort message for expensive EH support tells the user that the
+    // program 'unwound' without an 'invoke' instruction.
+    Constant *Msg =
+      ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+    AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
+    AbortMessageInit = Msg;
+  
+    GlobalVariable *MsgGV = M.getGlobalVariable("abort.msg", Msg->getType());
+    if (MsgGV && (!MsgGV->hasInitializer() || MsgGV->getInitializer() != Msg))
+      MsgGV = 0;
+
+    if (MsgGV) {
+      std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+      AbortMessage = 
+        ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+    }
+
+  } else {
+    // The abort message for cheap EH support tells the user that EH is not
+    // enabled.
+    Constant *Msg =
+      ConstantArray::get("Exception handler needed, but not enabled.  Recompile"
+                         " program with -enable-correct-eh-support.\n");
+    AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
+    AbortMessageInit = Msg;
+  
+    GlobalVariable *MsgGV = M.getGlobalVariable("abort.msg", Msg->getType());
+    if (MsgGV && (!MsgGV->hasInitializer() || MsgGV->getInitializer() != Msg))
+      MsgGV = 0;
+
+    if (MsgGV) {
+      std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+      AbortMessage =
+        ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+    }
+  }
+
+  // We need the 'write' and 'abort' functions for both models.
   AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, 0);
+
+  // Unfortunately, 'write' can end up being prototyped in several different
+  // ways.  If the user defines a three (or more) operand function named 'write'
+  // we will use their prototype.  We _do not_ want to insert another instance
+  // of a write prototype, because we don't know that the funcresolve pass will
+  // run after us.  If there is a definition of a write function, but it's not
+  // suitable for our uses, we just don't emit write calls.  If there is no
+  // write prototype at all, we just add one.
+  if (Function *WF = M.getNamedFunction("write")) {
+    if (WF->getFunctionType()->getNumParams() > 3 ||
+        WF->getFunctionType()->isVarArg())
+      WriteFn = WF;
+    else
+      WriteFn = 0;
+  } else {
+    WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::IntTy,
+                                    VoidPtrTy, Type::IntTy, 0);
+  }
   return true;
 }
 
-bool LowerInvoke::runOnFunction(Function &F) {
+void LowerInvoke::writeAbortMessage(Instruction *IB) {
+  if (WriteFn) {
+    if (!AbortMessage) {
+      GlobalVariable *MsgGV = new GlobalVariable(AbortMessageInit->getType(),
+                                                 true,
+                                                 GlobalValue::InternalLinkage,
+                                                 AbortMessageInit, "abort.msg",
+                                                 WriteFn->getParent());
+      std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+      AbortMessage = 
+        ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+    }
+
+    // These are the arguments we WANT...
+    std::vector<Value*> Args;
+    Args.push_back(ConstantInt::get(Type::IntTy, 2));
+    Args.push_back(AbortMessage);
+    Args.push_back(ConstantInt::get(Type::IntTy, AbortMessageLength));
+
+    // If the actual declaration of write disagrees, insert casts as
+    // appropriate.
+    const FunctionType *FT = WriteFn->getFunctionType();
+    unsigned NumArgs = FT->getNumParams();
+    for (unsigned i = 0; i != 3; ++i)
+      if (i < NumArgs && FT->getParamType(i) != Args[i]->getType())
+        Args[i] = ConstantExpr::getCast(cast<Constant>(Args[i]), 
+                                        FT->getParamType(i));
+
+    new CallInst(WriteFn, Args, "", IB);
+  }
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
   bool Changed = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
@@ -63,17 +221,21 @@
       new BranchInst(II->getNormalDest(), II);
 
       // Remove any PHI node entries from the exception destination.
-      II->getExceptionalDest()->removePredecessor(BB);
+      II->getUnwindDest()->removePredecessor(BB);
 
       // Remove the invoke instruction now.
       BB->getInstList().erase(II);
 
       ++NumLowered; Changed = true;
     } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+      writeAbortMessage(UI);
+
       // Insert a call to abort()
       new CallInst(AbortFn, std::vector<Value*>(), "", UI);
 
-      // Insert a return instruction.
+      // Insert a return instruction.  This really should be a "barrier", as it
+      // is unreachable.
       new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
                             Constant::getNullValue(F.getReturnType()), UI);
 
@@ -83,4 +245,152 @@
       ++NumLowered; Changed = true;
     }
   return Changed;
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+  bool Changed = false;
+
+  // If a function uses invoke, we have an alloca for the jump buffer.
+  AllocaInst *JmpBuf = 0;
+
+  // If this function contains an unwind instruction, two blocks get added: one
+  // to actually perform the longjmp, and one to terminate the program if there
+  // is no handler.
+  BasicBlock *UnwindBlock = 0, *TermBlock = 0;
+  std::vector<LoadInst*> JBPtrs;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (JmpBuf == 0)
+        JmpBuf = new AllocaInst(JBLinkTy, 0, "jblink", F.begin()->begin());
+
+      // On the entry to the invoke, we must install our JmpBuf as the top of
+      // the stack.
+      LoadInst *OldEntry = new LoadInst(JBListHead, "oldehlist", II);
+
+      // Store this old value as our 'next' field, and store our alloca as the
+      // current jblist.
+      std::vector<Value*> Idx;
+      Idx.push_back(Constant::getNullValue(Type::LongTy));
+      Idx.push_back(ConstantUInt::get(Type::UByteTy, 0));
+      Value *NextFieldPtr = new GetElementPtrInst(JmpBuf, Idx, "NextField", II);
+      new StoreInst(OldEntry, NextFieldPtr, II);
+      new StoreInst(JmpBuf, JBListHead, II);
+      
+      // Call setjmp, passing in the address of the jmpbuffer.
+      Idx[1] = ConstantUInt::get(Type::UByteTy, 1);
+      Value *JmpBufPtr = new GetElementPtrInst(JmpBuf, Idx, "TheJmpBuf", II);
+      Value *SJRet = new CallInst(SetJmpFn, JmpBufPtr, "sjret", II);
+
+      // Compare the return value to zero.
+      Value *IsNormal = BinaryOperator::create(Instruction::SetEQ, SJRet,
+                                       Constant::getNullValue(SJRet->getType()),
+                                               "notunwind", II);
+      // Create the receiver block if there is a critical edge to the normal
+      // destination.
+      SplitCriticalEdge(II, 0, this);
+      Instruction *InsertLoc = II->getNormalDest()->begin();
+      
+      // Insert a normal call instruction on the normal execution path.
+      std::string Name = II->getName(); II->setName("");
+      Value *NewCall = new CallInst(II->getCalledValue(),
+                                    std::vector<Value*>(II->op_begin()+3,
+                                                        II->op_end()), Name,
+                                    InsertLoc);
+      II->replaceAllUsesWith(NewCall);
+      
+      // If we got this far, then no exception was thrown and we can pop our
+      // jmpbuf entry off.
+      new StoreInst(OldEntry, JBListHead, InsertLoc);
+
+      // Now we change the invoke into a branch instruction.
+      new BranchInst(II->getNormalDest(), II->getUnwindDest(), IsNormal, II);
+
+      // Remove the InvokeInst now.
+      BB->getInstList().erase(II);
+      ++NumLowered; Changed = true;      
+      
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      if (UnwindBlock == 0) {
+        // Create two new blocks, the unwind block and the terminate block.  Add
+        // them at the end of the function because they are not hot.
+        UnwindBlock = new BasicBlock("unwind", &F);
+        TermBlock = new BasicBlock("unwinderror", &F);
+
+        // Insert return instructions.  These really should be "barrier"s, as
+        // they are unreachable.
+        new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
+                       Constant::getNullValue(F.getReturnType()), UnwindBlock);
+        new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
+                       Constant::getNullValue(F.getReturnType()), TermBlock);
+      }
+
+      // Load the JBList, if it's null, then there was no catch!
+      LoadInst *Ptr = new LoadInst(JBListHead, "ehlist", UI);
+      Value *NotNull = BinaryOperator::create(Instruction::SetNE, Ptr,
+                                        Constant::getNullValue(Ptr->getType()),
+                                              "notnull", UI);
+      new BranchInst(UnwindBlock, TermBlock, NotNull, UI);
+
+      // Remember the loaded value so we can insert the PHI node as needed.
+      JBPtrs.push_back(Ptr);
+
+      // Remove the UnwindInst now.
+      BB->getInstList().erase(UI);
+      ++NumLowered; Changed = true;      
+    }
+
+  // If an unwind instruction was inserted, we need to set up the Unwind and
+  // term blocks.
+  if (UnwindBlock) {
+    // In the unwind block, we know that the pointer coming in on the JBPtrs
+    // list are non-null.
+    Instruction *RI = UnwindBlock->getTerminator();
+
+    Value *RecPtr;
+    if (JBPtrs.size() == 1)
+      RecPtr = JBPtrs[0];
+    else {
+      // If there is more than one unwind in this function, make a PHI node to
+      // merge in all of the loaded values.
+      PHINode *PN = new PHINode(JBPtrs[0]->getType(), "jbptrs", RI);
+      for (unsigned i = 0, e = JBPtrs.size(); i != e; ++i)
+        PN->addIncoming(JBPtrs[i], JBPtrs[i]->getParent());
+      RecPtr = PN;
+    }
+
+    // Now that we have a pointer to the whole record, remove the entry from the
+    // JBList.
+    std::vector<Value*> Idx;
+    Idx.push_back(Constant::getNullValue(Type::LongTy));
+    Idx.push_back(ConstantUInt::get(Type::UByteTy, 0));
+    Value *NextFieldPtr = new GetElementPtrInst(RecPtr, Idx, "NextField", RI);
+    Value *NextRec = new LoadInst(NextFieldPtr, "NextRecord", RI);
+    new StoreInst(NextRec, JBListHead, RI);
+
+    // Now that we popped the top of the JBList, get a pointer to the jmpbuf and
+    // longjmp.
+    Idx[1] = ConstantUInt::get(Type::UByteTy, 1);
+    Idx[0] = new GetElementPtrInst(RecPtr, Idx, "JmpBuf", RI);
+    Idx[1] = ConstantInt::get(Type::IntTy, 1);
+    new CallInst(LongJmpFn, Idx, "", RI);
+
+    // Now we set up the terminate block.
+    RI = TermBlock->getTerminator();
+    
+    // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+    writeAbortMessage(RI);
+
+    // Insert a call to abort()
+    new CallInst(AbortFn, std::vector<Value*>(), "", RI);
+  }
+
+  return Changed;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+  if (ExpensiveEHSupport)
+    return insertExpensiveEHSupport(F);
+  else
+    return insertCheapEHSupport(F);
 }


Index: llvm/lib/Transforms/Scalar/LowerSwitch.cpp
diff -u llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10 llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10.2.1
--- llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10	Fri Jan  9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/LowerSwitch.cpp	Mon Mar  1 17:58:16 2004
@@ -115,7 +115,8 @@
 
   Case& Pivot = *(Begin + Mid);
   DEBUG(std::cerr << "Pivot ==> "
-                  << cast<ConstantUInt>(Pivot.first)->getValue() << "\n");
+                  << (int64_t)cast<ConstantInt>(Pivot.first)->getRawValue()
+                  << "\n");
 
   BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
                                       OrigBlock, Default);


Index: llvm/lib/Transforms/Scalar/SCCP.cpp
diff -u llvm/lib/Transforms/Scalar/SCCP.cpp:1.88 llvm/lib/Transforms/Scalar/SCCP.cpp:1.88.2.1
--- llvm/lib/Transforms/Scalar/SCCP.cpp:1.88	Mon Jan 12 13:08:43 2004
+++ llvm/lib/Transforms/Scalar/SCCP.cpp	Mon Mar  1 17:58:16 2004
@@ -689,14 +689,16 @@
   // addressing...
   for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i)
     if (ConstantUInt *CU = dyn_cast<ConstantUInt>(CE->getOperand(i))) {
-      ConstantStruct *CS = cast<ConstantStruct>(C);
+      ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+      if (CS == 0) return 0;
       if (CU->getValue() >= CS->getValues().size()) return 0;
       C = cast<Constant>(CS->getValues()[CU->getValue()]);
     } else if (ConstantSInt *CS = dyn_cast<ConstantSInt>(CE->getOperand(i))) {
-      ConstantArray *CA = cast<ConstantArray>(C);
+      ConstantArray *CA = dyn_cast<ConstantArray>(C);
+      if (CA == 0) return 0;
       if ((uint64_t)CS->getValue() >= CA->getValues().size()) return 0;
       C = cast<Constant>(CA->getValues()[CS->getValue()]);
-    } else 
+    } else
       return 0;
   return C;
 }


Index: llvm/lib/Transforms/Scalar/TailDuplication.cpp
diff -u llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11 llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11.2.1
--- llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11	Fri Jan  9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/TailDuplication.cpp	Mon Mar  1 17:58:16 2004
@@ -41,6 +41,7 @@
     bool runOnFunction(Function &F);
   private:
     inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI);
+    inline bool canEliminateUnconditionalBranch(TerminatorInst *TI);
     inline void eliminateUnconditionalBranch(BranchInst *BI);
     inline void InsertPHINodesIfNecessary(Instruction *OrigInst, Value *NewInst,
                                           BasicBlock *NewBlock);
@@ -63,7 +64,8 @@
 bool TailDup::runOnFunction(Function &F) {
   bool Changed = false;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; )
-    if (shouldEliminateUnconditionalBranch(I->getTerminator())) {
+    if (shouldEliminateUnconditionalBranch(I->getTerminator()) &&
+        canEliminateUnconditionalBranch(I->getTerminator())) {
       eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
       Changed = true;
     } else {
@@ -109,6 +111,36 @@
   for (unsigned Size = 0; I != Dest->end(); ++Size, ++I)
     if (Size == 6) return false;  // The block is too large...
   return true;  
+}
+
+/// canEliminateUnconditionalBranch - Unfortunately, the general form of tail
+/// duplication can do very bad things to SSA form, by destroying arbitrary
+/// relationships between dominators and dominator frontiers as it processes the
+/// program.  The right solution for this is to have an incrementally updating
+/// dominator data structure, which can gracefully react to arbitrary
+/// "addEdge/removeEdge" changes to the CFG.  Implementing this is nontrivial,
+/// however, so we just disable the transformation in cases where it is not
+/// currently safe.
+///
+bool TailDup::canEliminateUnconditionalBranch(TerminatorInst *TI) {
+  // Basically, we refuse to make the transformation if any of the values
+  // computed in the 'tail' are used in any other basic blocks.
+  BasicBlock *BB = TI->getParent();
+  BasicBlock *Tail = TI->getSuccessor(0);
+  assert(isa<BranchInst>(TI) && cast<BranchInst>(TI)->isUnconditional());
+  
+  for (BasicBlock::iterator I = Tail->begin(), E = Tail->end(); I != E; ++I)
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (User->getParent() != Tail && User->getParent() != BB)
+        return false;
+
+      // The 'swap' problem foils the tail duplication rewriting code.
+      if (isa<PHINode>(User) && User->getParent() == Tail)
+        return false;
+    }
+  return true;
 }
 
 


Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
diff -u llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12 llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12.2.1
--- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12	Sun Dec 14 17:57:39 2003
+++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp	Mon Mar  1 17:58:16 2004
@@ -290,7 +290,7 @@
   if (OldEntry == 0) {
     OldEntry = &F->getEntryBlock();
     std::string OldName = OldEntry->getName(); OldEntry->setName("tailrecurse");
-    BasicBlock *NewEntry = new BasicBlock(OldName, OldEntry);
+    BasicBlock *NewEntry = new BasicBlock(OldName, F, OldEntry);
     new BranchInst(OldEntry, NewEntry);
     
     // Now that we have created a new block, which jumps to the entry