[llvm-commits] [parallel] CVS: llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp ADCE.cpp GCSE.cpp InstructionCombining.cpp LICM.cpp LoopSimplify.cpp LowerAllocations.cpp LowerInvoke.cpp LowerSwitch.cpp SCCP.cpp TailDuplication.cpp TailRecursionElimination.cpp
Misha Brukman
brukman at cs.uiuc.edu
Mon Mar 1 18:05:23 PST 2004
Changes in directory llvm/lib/Transforms/Scalar:
BasicBlockPlacement.cpp added (r1.2.2.1)
ADCE.cpp updated: 1.70 -> 1.70.2.1
GCSE.cpp updated: 1.33 -> 1.33.2.1
InstructionCombining.cpp updated: 1.149 -> 1.149.2.1
LICM.cpp updated: 1.55 -> 1.55.2.1
LoopSimplify.cpp updated: 1.30 -> 1.30.2.1
LowerAllocations.cpp updated: 1.43 -> 1.43.2.1
LowerInvoke.cpp updated: 1.4 -> 1.4.4.1
LowerSwitch.cpp updated: 1.10 -> 1.10.2.1
SCCP.cpp updated: 1.88 -> 1.88.2.1
TailDuplication.cpp updated: 1.11 -> 1.11.2.1
TailRecursionElimination.cpp updated: 1.12 -> 1.12.2.1
---
Log message:
Merge from trunk
---
Diffs of the changes: (+992 -116)
Index: llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
diff -c /dev/null llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp:1.2.2.1
*** /dev/null Mon Mar 1 17:58:28 2004
--- llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp Mon Mar 1 17:58:16 2004
***************
*** 0 ****
--- 1,139 ----
+ //===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+ //
+ // The LLVM Compiler Infrastructure
+ //
+ // This file was developed by the LLVM research group and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file implements a very simple profile guided basic block placement
+ // algorithm. The idea is to put frequently executed blocks together at the
+ // start of the function, and hopefully increase the number of fall-through
+ // conditional branches. If there is no profile information for a particular
+ // function, this pass basically orders blocks in depth-first order
+ //
+ // The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+ // Positioning" by Pettis and Hansen, except that it uses basic block counts
+ // instead of edge counts. This should be improved in many ways, but is very
+ // simple for now.
+ //
+ // Basically we "place" the entry block, then loop over all successors in a DFO,
+ // placing the most frequently executed successor until we run out of blocks. I
+ // told you this was _extremely_ simplistic. :) This is also much slower than it
+ // could be. When it becomes important, this pass will be rewritten to use a
+ // better algorithm, and then we can worry about efficiency.
+ //
+ //===----------------------------------------------------------------------===//
+
+ #include "llvm/Analysis/ProfileInfo.h"
+ #include "llvm/Function.h"
+ #include "llvm/Pass.h"
+ #include "llvm/Support/CFG.h"
+ #include "Support/Statistic.h"
+ #include <set>
+ using namespace llvm;
+
+ namespace {
+ Statistic<> NumMoved("block-placement", "Number of basic blocks moved");
+
+ struct BlockPlacement : public FunctionPass {
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<ProfileInfo>();
+ //AU.addPreserved<ProfileInfo>(); // Does this work?
+ }
+ private:
+ /// PI - The profile information that is guiding us.
+ ///
+ ProfileInfo *PI;
+
+ /// NumMovedBlocks - Every time we move a block, increment this counter.
+ ///
+ unsigned NumMovedBlocks;
+
+ /// PlacedBlocks - Every time we place a block, remember it so we don't get
+ /// into infinite loops.
+ std::set<BasicBlock*> PlacedBlocks;
+
+ /// InsertPos - This an iterator to the next place we want to insert a
+ /// block.
+ Function::iterator InsertPos;
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void PlaceBlocks(BasicBlock *BB);
+ };
+
+ RegisterOpt<BlockPlacement> X("block-placement",
+ "Profile Guided Basic Block Placement");
+ }
+
+ bool BlockPlacement::runOnFunction(Function &F) {
+ PI = &getAnalysis<ProfileInfo>();
+
+ NumMovedBlocks = 0;
+ InsertPos = F.begin();
+
+ // Recursively place all blocks.
+ PlaceBlocks(F.begin());
+
+ PlacedBlocks.clear();
+ NumMoved += NumMovedBlocks;
+ return NumMovedBlocks != 0;
+ }
+
+
+ /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+ /// successors.
+ void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+ assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+ PlacedBlocks.insert(BB);
+
+ // Place the specified block.
+ if (&*InsertPos != BB) {
+ // Use splice to move the block into the right place. This avoids having to
+ // remove the block from the function then readd it, which causes a bunch of
+ // symbol table traffic that is entirely pointless.
+ Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+ Blocks.splice(InsertPos, Blocks, BB);
+
+ ++NumMovedBlocks;
+ } else {
+ // This block is already in the right place, we don't have to do anything.
+ ++InsertPos;
+ }
+
+ // Keep placing successors until we run out of ones to place. Note that this
+ // loop is very inefficient (N^2) for blocks with many successors, like switch
+ // statements. FIXME!
+ while (1) {
+ // Okay, now place any unplaced successors.
+ succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+ // Scan for the first unplaced successor.
+ for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+ /*empty*/;
+ if (SI == E) return; // No more successors to place.
+
+ unsigned MaxExecutionCount = PI->getExecutionCount(*SI);
+ BasicBlock *MaxSuccessor = *SI;
+
+ // Scan for more frequently executed successors
+ for (; SI != E; ++SI)
+ if (!PlacedBlocks.count(*SI)) {
+ unsigned Count = PI->getExecutionCount(*SI);
+ if (Count > MaxExecutionCount ||
+ // Prefer to not disturb the code.
+ (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+ MaxExecutionCount = Count;
+ MaxSuccessor = *SI;
+ }
+ }
+
+ // Now that we picked the maximally executed successor, place it.
+ PlaceBlocks(MaxSuccessor);
+ }
+ }
Index: llvm/lib/Transforms/Scalar/ADCE.cpp
diff -u llvm/lib/Transforms/Scalar/ADCE.cpp:1.70 llvm/lib/Transforms/Scalar/ADCE.cpp:1.70.2.1
--- llvm/lib/Transforms/Scalar/ADCE.cpp:1.70 Fri Dec 19 03:08:34 2003
+++ llvm/lib/Transforms/Scalar/ADCE.cpp Mon Mar 1 17:58:16 2004
@@ -144,6 +144,7 @@
// Delete the instruction...
I = BB->getInstList().erase(I);
Changed = true;
+ ++NumInstRemoved;
} else {
++I;
}
Index: llvm/lib/Transforms/Scalar/GCSE.cpp
diff -u llvm/lib/Transforms/Scalar/GCSE.cpp:1.33 llvm/lib/Transforms/Scalar/GCSE.cpp:1.33.2.1
--- llvm/lib/Transforms/Scalar/GCSE.cpp:1.33 Fri Jan 9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/GCSE.cpp Mon Mar 1 17:58:16 2004
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ValueNumbering.h"
#include "llvm/Support/InstIterator.h"
#include "Support/Statistic.h"
+#include "Support/Debug.h"
#include <algorithm>
using namespace llvm;
@@ -165,6 +166,9 @@
//
void GCSE::ReplaceInstWithInst(Instruction *First, BasicBlock::iterator SI) {
Instruction &Second = *SI;
+
+ DEBUG(std::cerr << "GCSE: Substituting %" << First->getName() << " for: "
+ << Second);
//cerr << "DEL " << (void*)Second << Second;
Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp
diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149.2.1
--- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.149 Wed Jan 14 00:06:08 2004
+++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Mon Mar 1 17:58:16 2004
@@ -35,6 +35,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
#include "llvm/Pass.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -60,15 +61,25 @@
std::vector<Instruction*> WorkList;
TargetData *TD;
- void AddUsesToWorkList(Instruction &I) {
- // The instruction was simplified, add all users of the instruction to
- // the work lists because they might get more simplified now...
- //
+ /// AddUsersToWorkList - When an instruction is simplified, add all users of
+ /// the instruction to the work lists because they might get more simplified
+ /// now.
+ ///
+ void AddUsersToWorkList(Instruction &I) {
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
UI != UE; ++UI)
WorkList.push_back(cast<Instruction>(*UI));
}
+ /// AddUsesToWorkList - When an instruction is simplified, add operands to
+ /// the work lists because they might get more simplified now.
+ ///
+ void AddUsesToWorkList(Instruction &I) {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+ WorkList.push_back(Op);
+ }
+
// removeFromWorkList - remove all instances of I from the worklist.
void removeFromWorkList(Instruction *I);
public:
@@ -116,12 +127,13 @@
// InsertNewInstBefore - insert an instruction New before instruction Old
// in the program. Add the new instruction to the worklist.
//
- void InsertNewInstBefore(Instruction *New, Instruction &Old) {
+ Value *InsertNewInstBefore(Instruction *New, Instruction &Old) {
assert(New && New->getParent() == 0 &&
"New instruction already inserted into a basic block!");
BasicBlock *BB = Old.getParent();
BB->getInstList().insert(&Old, New); // Insert inst
WorkList.push_back(New); // Add to worklist
+ return New;
}
public:
@@ -132,10 +144,24 @@
// modified.
//
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
- AddUsesToWorkList(I); // Add all modified instrs to worklist
+ AddUsersToWorkList(I); // Add all modified instrs to worklist
I.replaceAllUsesWith(V);
return &I;
}
+
+ // EraseInstFromFunction - When dealing with an instruction that has side
+ // effects or produces a void value, we can't rely on DCE to delete the
+ // instruction. Instead, visit methods should return the value returned by
+ // this function.
+ Instruction *EraseInstFromFunction(Instruction &I) {
+ assert(I.use_empty() && "Cannot erase instruction that is used!");
+ AddUsesToWorkList(I);
+ removeFromWorkList(&I);
+ I.getParent()->getInstList().erase(&I);
+ return 0; // Don't do anything with FI
+ }
+
+
private:
/// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
/// InsertBefore instruction. This is specialized a bit to avoid inserting
@@ -173,6 +199,31 @@
return V->hasOneUse() || isa<Constant>(V);
}
+// getSignedIntegralType - Given an unsigned integral type, return the signed
+// version of it that has the same size.
+static const Type *getSignedIntegralType(const Type *Ty) {
+ switch (Ty->getPrimitiveID()) {
+ default: assert(0 && "Invalid unsigned integer type!"); abort();
+ case Type::UByteTyID: return Type::SByteTy;
+ case Type::UShortTyID: return Type::ShortTy;
+ case Type::UIntTyID: return Type::IntTy;
+ case Type::ULongTyID: return Type::LongTy;
+ }
+}
+
+// getPromotedType - Return the specified type promoted as it would be to pass
+// though a va_arg area...
+static const Type *getPromotedType(const Type *Ty) {
+ switch (Ty->getPrimitiveID()) {
+ case Type::SByteTyID:
+ case Type::ShortTyID: return Type::IntTy;
+ case Type::UByteTyID:
+ case Type::UShortTyID: return Type::UIntTy;
+ case Type::FloatTyID: return Type::DoubleTy;
+ default: return Ty;
+ }
+}
+
// SimplifyCommutative - This performs a few simplifications for commutative
// operators:
//
@@ -415,7 +466,8 @@
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// X + 0 --> X
- if (RHS == Constant::getNullValue(I.getType()))
+ if (!I.getType()->isFloatingPoint() && // -0 + +0 = +0, so it's not a noop
+ RHS == Constant::getNullValue(I.getType()))
return ReplaceInstUsesWith(I, LHS);
// X + X --> X << 1
@@ -512,7 +564,8 @@
// Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
// is not used by anyone else...
//
- if (Op1I->getOpcode() == Instruction::Sub) {
+ if (Op1I->getOpcode() == Instruction::Sub &&
+ !Op1I->getType()->isFloatingPoint()) {
// Swap the two operands of the subexpr...
Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
Op1I->setOperand(0, IIOp1);
@@ -556,6 +609,26 @@
return 0;
}
+/// isSignBitCheck - Given an exploded setcc instruction, return true if it is
+/// really just returns true if the most significant (sign) bit is set.
+static bool isSignBitCheck(unsigned Opcode, Value *LHS, ConstantInt *RHS) {
+ if (RHS->getType()->isSigned()) {
+ // True if source is LHS < 0 or LHS <= -1
+ return Opcode == Instruction::SetLT && RHS->isNullValue() ||
+ Opcode == Instruction::SetLE && RHS->isAllOnesValue();
+ } else {
+ ConstantUInt *RHSC = cast<ConstantUInt>(RHS);
+ // True if source is LHS > 127 or LHS >= 128, where the constants depend on
+ // the size of the integer type.
+ if (Opcode == Instruction::SetGE)
+ return RHSC->getValue() == 1ULL<<(RHS->getType()->getPrimitiveSize()*8-1);
+ if (Opcode == Instruction::SetGT)
+ return RHSC->getValue() ==
+ (1ULL << (RHS->getType()->getPrimitiveSize()*8-1))-1;
+ }
+ return false;
+}
+
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0);
@@ -598,6 +671,52 @@
if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
return BinaryOperator::create(Instruction::Mul, Op0v, Op1v);
+ // If one of the operands of the multiply is a cast from a boolean value, then
+ // we know the bool is either zero or one, so this is a 'masking' multiply.
+ // See if we can simplify things based on how the boolean was originally
+ // formed.
+ CastInst *BoolCast = 0;
+ if (CastInst *CI = dyn_cast<CastInst>(I.getOperand(0)))
+ if (CI->getOperand(0)->getType() == Type::BoolTy)
+ BoolCast = CI;
+ if (!BoolCast)
+ if (CastInst *CI = dyn_cast<CastInst>(I.getOperand(1)))
+ if (CI->getOperand(0)->getType() == Type::BoolTy)
+ BoolCast = CI;
+ if (BoolCast) {
+ if (SetCondInst *SCI = dyn_cast<SetCondInst>(BoolCast->getOperand(0))) {
+ Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
+ const Type *SCOpTy = SCIOp0->getType();
+
+ // If the setcc is true iff the sign bit of X is set, then convert this
+ // multiply into a shift/and combination.
+ if (isa<ConstantInt>(SCIOp1) &&
+ isSignBitCheck(SCI->getOpcode(), SCIOp0, cast<ConstantInt>(SCIOp1))) {
+ // Shift the X value right to turn it into "all signbits".
+ Constant *Amt = ConstantUInt::get(Type::UByteTy,
+ SCOpTy->getPrimitiveSize()*8-1);
+ if (SCIOp0->getType()->isUnsigned()) {
+ const Type *NewTy = getSignedIntegralType(SCIOp0->getType());
+ SCIOp0 = InsertNewInstBefore(new CastInst(SCIOp0, NewTy,
+ SCIOp0->getName()), I);
+ }
+
+ Value *V =
+ InsertNewInstBefore(new ShiftInst(Instruction::Shr, SCIOp0, Amt,
+ BoolCast->getOperand(0)->getName()+
+ ".mask"), I);
+
+ // If the multiply type is not the same as the source type, sign extend
+ // or truncate to the multiply type.
+ if (I.getType() != V->getType())
+ V = InsertNewInstBefore(new CastInst(V, I.getType(), V->getName()),I);
+
+ Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
+ return BinaryOperator::create(Instruction::And, V, OtherOp);
+ }
+ }
+ }
+
return Changed ? &I : 0;
}
@@ -1001,15 +1120,26 @@
return Changed ? &I : 0;
}
+// XorSelf - Implements: X ^ X --> 0
+struct XorSelf {
+ Value *RHS;
+ XorSelf(Value *rhs) : RHS(rhs) {}
+ bool shouldApply(Value *LHS) const { return LHS == RHS; }
+ Instruction *apply(BinaryOperator &Xor) const {
+ return &Xor;
+ }
+};
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // xor X, X = 0
- if (Op0 == Op1)
+ // xor X, X = 0, even if X is nested in a sequence of Xor's.
+ if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
+ assert(Result == &I && "AssociativeOpt didn't work?");
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ }
if (ConstantIntegral *RHS = dyn_cast<ConstantIntegral>(Op1)) {
// xor X, 0 == X
@@ -1074,7 +1204,7 @@
ConstantIntegral::getAllOnesValue(I.getType()));
if (Instruction *Op1I = dyn_cast<Instruction>(Op1))
- if (Op1I->getOpcode() == Instruction::Or)
+ if (Op1I->getOpcode() == Instruction::Or) {
if (Op1I->getOperand(0) == Op0) { // B^(B|A) == (A|B)^B
cast<BinaryOperator>(Op1I)->swapOperands();
I.swapOperands();
@@ -1082,7 +1212,13 @@
} else if (Op1I->getOperand(1) == Op0) { // B^(A|B) == (A|B)^B
I.swapOperands();
std::swap(Op0, Op1);
- }
+ }
+ } else if (Op1I->getOpcode() == Instruction::Xor) {
+ if (Op0 == Op1I->getOperand(0)) // A^(A^B) == B
+ return ReplaceInstUsesWith(I, Op1I->getOperand(1));
+ else if (Op0 == Op1I->getOperand(1)) // A^(B^A) == B
+ return ReplaceInstUsesWith(I, Op1I->getOperand(0));
+ }
if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
if (Op0I->getOpcode() == Instruction::Or && Op0I->hasOneUse()) {
@@ -1094,6 +1230,11 @@
return BinaryOperator::create(Instruction::And, Op0I->getOperand(0),
NotB);
}
+ } else if (Op0I->getOpcode() == Instruction::Xor) {
+ if (Op1 == Op0I->getOperand(0)) // (A^B)^A == B
+ return ReplaceInstUsesWith(I, Op0I->getOperand(1));
+ else if (Op1 == Op0I->getOperand(1)) // (B^A)^A == B
+ return ReplaceInstUsesWith(I, Op0I->getOperand(0));
}
// (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1^C2 == 0
@@ -1250,14 +1391,7 @@
Value *X = BO->getOperand(0);
// If 'X' is not signed, insert a cast now...
if (!BOC->getType()->isSigned()) {
- const Type *DestTy;
- switch (BOC->getType()->getPrimitiveID()) {
- case Type::UByteTyID: DestTy = Type::SByteTy; break;
- case Type::UShortTyID: DestTy = Type::ShortTy; break;
- case Type::UIntTyID: DestTy = Type::IntTy; break;
- case Type::ULongTyID: DestTy = Type::LongTy; break;
- default: assert(0 && "Invalid unsigned integer type!"); abort();
- }
+ const Type *DestTy = getSignedIntegralType(BOC->getType());
CastInst *NewCI = new CastInst(X,DestTy,X->getName()+".signed");
InsertNewInstBefore(NewCI, I);
X = NewCI;
@@ -1270,6 +1404,43 @@
default: break;
}
}
+ } else { // Not a SetEQ/SetNE
+ // If the LHS is a cast from an integral value of the same size,
+ if (CastInst *Cast = dyn_cast<CastInst>(Op0)) {
+ Value *CastOp = Cast->getOperand(0);
+ const Type *SrcTy = CastOp->getType();
+ unsigned SrcTySize = SrcTy->getPrimitiveSize();
+ if (SrcTy != Cast->getType() && SrcTy->isInteger() &&
+ SrcTySize == Cast->getType()->getPrimitiveSize()) {
+ assert((SrcTy->isSigned() ^ Cast->getType()->isSigned()) &&
+ "Source and destination signednesses should differ!");
+ if (Cast->getType()->isSigned()) {
+ // If this is a signed comparison, check for comparisons in the
+ // vicinity of zero.
+ if (I.getOpcode() == Instruction::SetLT && CI->isNullValue())
+ // X < 0 => x > 127
+ return BinaryOperator::create(Instruction::SetGT, CastOp,
+ ConstantUInt::get(SrcTy, (1ULL << (SrcTySize*8-1))-1));
+ else if (I.getOpcode() == Instruction::SetGT &&
+ cast<ConstantSInt>(CI)->getValue() == -1)
+ // X > -1 => x < 128
+ return BinaryOperator::create(Instruction::SetLT, CastOp,
+ ConstantUInt::get(SrcTy, 1ULL << (SrcTySize*8-1)));
+ } else {
+ ConstantUInt *CUI = cast<ConstantUInt>(CI);
+ if (I.getOpcode() == Instruction::SetLT &&
+ CUI->getValue() == 1ULL << (SrcTySize*8-1))
+ // X < 128 => X > -1
+ return BinaryOperator::create(Instruction::SetGT, CastOp,
+ ConstantSInt::get(SrcTy, -1));
+ else if (I.getOpcode() == Instruction::SetGT &&
+ CUI->getValue() == (1ULL << (SrcTySize*8-1))-1)
+ // X > 127 => X < 0
+ return BinaryOperator::create(Instruction::SetLT, CastOp,
+ Constant::getNullValue(SrcTy));
+ }
+ }
+ }
}
// Check to see if we are comparing against the minimum or maximum value...
@@ -1306,6 +1477,15 @@
if (I.getOpcode() == Instruction::SetLE) // A <= MAX-1 -> A != MAX
return BinaryOperator::create(Instruction::SetNE, Op0, AddOne(CI));
}
+
+ // If we still have a setle or setge instruction, turn it into the
+ // appropriate setlt or setgt instruction. Since the border cases have
+ // already been handled above, this requires little checking.
+ //
+ if (I.getOpcode() == Instruction::SetLE)
+ return BinaryOperator::create(Instruction::SetLT, Op0, AddOne(CI));
+ if (I.getOpcode() == Instruction::SetGE)
+ return BinaryOperator::create(Instruction::SetGT, Op0, SubOne(CI));
}
// Test to see if the operands of the setcc are casted versions of other
@@ -1416,9 +1596,14 @@
// of a signed value.
//
unsigned TypeBits = Op0->getType()->getPrimitiveSize()*8;
- if (CUI->getValue() >= TypeBits &&
- (!Op0->getType()->isSigned() || isLeftShift))
- return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ if (CUI->getValue() >= TypeBits) {
+ if (!Op0->getType()->isSigned() || isLeftShift)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+ else {
+ I.setOperand(1, ConstantUInt::get(Type::UByteTy, TypeBits-1));
+ return &I;
+ }
+ }
// ((X*C1) << C2) == (X * (C1 << C2))
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
@@ -1482,6 +1667,8 @@
// Check for (A << c1) << c2 and (A >> c1) >> c2
if (I.getOpcode() == Op0SI->getOpcode()) {
unsigned Amt = ShiftAmt1+ShiftAmt2; // Fold into one big shift...
+ if (Op0->getType()->getPrimitiveSize()*8 < Amt)
+ Amt = Op0->getType()->getPrimitiveSize()*8;
return new ShiftInst(I.getOpcode(), Op0SI->getOperand(0),
ConstantUInt::get(Type::UByteTy, Amt));
}
@@ -1747,6 +1934,23 @@
// CallInst simplification
//
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ // Intrinsics cannot occur in an invoke, so handle them here instead of in
+ // visitCallSite.
+ if (Function *F = CI.getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ // memmove/cpy/set of zero bytes is a noop.
+ if (Constant *NumBytes = dyn_cast<Constant>(CI.getOperand(3))) {
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
+ }
+ break;
+ default:
+ break;
+ }
+
return visitCallSite(&CI);
}
@@ -1756,19 +1960,6 @@
return visitCallSite(&II);
}
-// getPromotedType - Return the specified type promoted as it would be to pass
-// though a va_arg area...
-static const Type *getPromotedType(const Type *Ty) {
- switch (Ty->getPrimitiveID()) {
- case Type::SByteTyID:
- case Type::ShortTyID: return Type::IntTy;
- case Type::UByteTyID:
- case Type::UShortTyID: return Type::UIntTy;
- case Type::FloatTyID: return Type::DoubleTy;
- default: return Ty;
- }
-}
-
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -1838,7 +2029,7 @@
UI != E; ++UI)
if (PHINode *PN = dyn_cast<PHINode>(*UI))
if (PN->getParent() == II->getNormalDest() ||
- PN->getParent() == II->getExceptionalDest())
+ PN->getParent() == II->getUnwindDest())
return false;
}
@@ -1903,7 +2094,7 @@
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NC = new InvokeInst(Callee, II->getNormalDest(), II->getExceptionalDest(),
+ NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
Args, Caller->getName(), Caller);
} else {
NC = new CallInst(Callee, Args, Caller->getName(), Caller);
@@ -1925,7 +2116,7 @@
// Otherwise, it's a call, just insert cast right after the call instr
InsertNewInstBefore(NC, *Caller);
}
- AddUsesToWorkList(*Caller);
+ AddUsersToWorkList(*Caller);
} else {
NV = Constant::getNullValue(Caller->getType());
}
@@ -1945,6 +2136,35 @@
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Value *V = hasConstantValue(&PN))
return ReplaceInstUsesWith(PN, V);
+
+ // If the only user of this instruction is a cast instruction, and all of the
+ // incoming values are constants, change this PHI to merge together the casted
+ // constants.
+ if (PN.hasOneUse())
+ if (CastInst *CI = dyn_cast<CastInst>(PN.use_back()))
+ if (CI->getType() != PN.getType()) { // noop casts will be folded
+ bool AllConstant = true;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (!isa<Constant>(PN.getIncomingValue(i))) {
+ AllConstant = false;
+ break;
+ }
+ if (AllConstant) {
+ // Make a new PHI with all casted values.
+ PHINode *New = new PHINode(CI->getType(), PN.getName(), &PN);
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ Constant *OldArg = cast<Constant>(PN.getIncomingValue(i));
+ New->addIncoming(ConstantExpr::getCast(OldArg, New->getType()),
+ PN.getIncomingBlock(i));
+ }
+
+ // Update the cast instruction.
+ CI->setOperand(0, New);
+ WorkList.push_back(CI); // revisit the cast instruction to fold.
+ WorkList.push_back(New); // Make sure to revisit the new Phi
+ return &PN; // PN is now dead!
+ }
+ }
return 0;
}
@@ -1952,9 +2172,14 @@
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Is it 'getelementptr %P, long 0' or 'getelementptr %P'
// If so, eliminate the noop.
- if ((GEP.getNumOperands() == 2 &&
- GEP.getOperand(1) == Constant::getNullValue(Type::LongTy)) ||
- GEP.getNumOperands() == 1)
+ if (GEP.getNumOperands() == 1)
+ return ReplaceInstUsesWith(GEP, GEP.getOperand(0));
+
+ bool HasZeroPointerIndex = false;
+ if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1)))
+ HasZeroPointerIndex = C->isNullValue();
+
+ if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
return ReplaceInstUsesWith(GEP, GEP.getOperand(0));
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -1975,12 +2200,20 @@
assert(Sum && "Constant folding of longs failed!?");
GEP.setOperand(0, Src->getOperand(0));
GEP.setOperand(1, Sum);
- AddUsesToWorkList(*Src); // Reduce use count of Src
+ AddUsersToWorkList(*Src); // Reduce use count of Src
return &GEP;
} else if (Src->getNumOperands() == 2) {
// Replace: gep (gep %P, long B), long A, ...
// With: T = long A+B; gep %P, T, ...
//
+ // Note that if our source is a gep chain itself that we wait for that
+ // chain to be resolved before we perform this transformation. This
+ // avoids us creating a TON of code in some cases.
+ //
+ if (isa<GetElementPtrInst>(Src->getOperand(0)) &&
+ cast<Instruction>(Src->getOperand(0))->getNumOperands() == 2)
+ return 0; // Wait until our source is folded to completion.
+
Value *Sum = BinaryOperator::create(Instruction::Add, Src->getOperand(1),
GEP.getOperand(1),
Src->getName()+".sum", &GEP);
@@ -2021,6 +2254,31 @@
// Replace all uses of the GEP with the new constexpr...
return ReplaceInstUsesWith(GEP, CE);
}
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP.getOperand(0))) {
+ if (CE->getOpcode() == Instruction::Cast) {
+ if (HasZeroPointerIndex) {
+ // transform: GEP (cast [10 x ubyte]* X to [0 x ubyte]*), long 0, ...
+ // into : GEP [10 x ubyte]* X, long 0, ...
+ //
+ // This occurs when the program declares an array extern like "int X[];"
+ //
+ Constant *X = CE->getOperand(0);
+ const PointerType *CPTy = cast<PointerType>(CE->getType());
+ if (const PointerType *XTy = dyn_cast<PointerType>(X->getType()))
+ if (const ArrayType *XATy =
+ dyn_cast<ArrayType>(XTy->getElementType()))
+ if (const ArrayType *CATy =
+ dyn_cast<ArrayType>(CPTy->getElementType()))
+ if (CATy->getElementType() == XATy->getElementType()) {
+ // At this point, we know that the cast source type is a pointer
+ // to an array of the same type as the destination pointer
+ // array. Because the array type is never stepped over (there
+ // is a leading zero) we can fold the cast into this GEP.
+ GEP.setOperand(0, X);
+ return &GEP;
+ }
+ }
+ }
}
return 0;
@@ -2071,6 +2329,11 @@
return &FI;
}
+ // If we have 'free null' delete the instruction. This can happen in stl code
+ // when lots of inlining happens.
+ if (isa<ConstantPointerNull>(Op))
+ return EraseInstFromFunction(FI);
+
return 0;
}
@@ -2087,11 +2350,13 @@
// addressing...
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i)
if (ConstantUInt *CU = dyn_cast<ConstantUInt>(CE->getOperand(i))) {
- ConstantStruct *CS = cast<ConstantStruct>(C);
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+ if (CS == 0) return 0;
if (CU->getValue() >= CS->getValues().size()) return 0;
C = cast<Constant>(CS->getValues()[CU->getValue()]);
} else if (ConstantSInt *CS = dyn_cast<ConstantSInt>(CE->getOperand(i))) {
- ConstantArray *CA = cast<ConstantArray>(C);
+ ConstantArray *CA = dyn_cast<ConstantArray>(C);
+ if (CA == 0) return 0;
if ((uint64_t)CS->getValue() >= CA->getValues().size()) return 0;
C = cast<Constant>(CA->getValues()[CS->getValue()]);
} else
@@ -2125,7 +2390,7 @@
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
- if (BI.isConditional() && !isa<Constant>(BI.getCondition()))
+ if (BI.isConditional() && !isa<Constant>(BI.getCondition())) {
if (Value *V = dyn_castNotVal(BI.getCondition())) {
BasicBlock *TrueDest = BI.getSuccessor(0);
BasicBlock *FalseDest = BI.getSuccessor(1);
@@ -2134,7 +2399,29 @@
BI.setSuccessor(0, FalseDest);
BI.setSuccessor(1, TrueDest);
return &BI;
+ } else if (SetCondInst *I = dyn_cast<SetCondInst>(BI.getCondition())) {
+ // Cannonicalize setne -> seteq
+ if ((I->getOpcode() == Instruction::SetNE ||
+ I->getOpcode() == Instruction::SetLE ||
+ I->getOpcode() == Instruction::SetGE) && I->hasOneUse()) {
+ std::string Name = I->getName(); I->setName("");
+ Instruction::BinaryOps NewOpcode =
+ SetCondInst::getInverseCondition(I->getOpcode());
+ Value *NewSCC = BinaryOperator::create(NewOpcode, I->getOperand(0),
+ I->getOperand(1), Name, I);
+ BasicBlock *TrueDest = BI.getSuccessor(0);
+ BasicBlock *FalseDest = BI.getSuccessor(1);
+ // Swap Destinations and condition...
+ BI.setCondition(NewSCC);
+ BI.setSuccessor(0, FalseDest);
+ BI.setSuccessor(1, TrueDest);
+ removeFromWorkList(I);
+ I->getParent()->getInstList().erase(I);
+ WorkList.push_back(cast<Instruction>(NewSCC));
+ return &BI;
+ }
}
+ }
return 0;
}
@@ -2159,9 +2446,7 @@
if (isInstructionTriviallyDead(I)) {
// Add operands to the worklist...
if (I->getNumOperands() < 4)
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
- WorkList.push_back(Op);
+ AddUsesToWorkList(*I);
++NumDeadInst;
I->getParent()->getInstList().erase(I);
@@ -2172,9 +2457,7 @@
// Instruction isn't dead, see if we can constant propagate it...
if (Constant *C = ConstantFoldInstruction(I)) {
// Add operands to the worklist...
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
- WorkList.push_back(Op);
+ AddUsesToWorkList(*I);
ReplaceInstUsesWith(*I, C);
++NumConstProp;
@@ -2220,7 +2503,7 @@
if (Result) {
WorkList.push_back(Result);
- AddUsesToWorkList(*Result);
+ AddUsersToWorkList(*Result);
}
Changed = true;
}
Index: llvm/lib/Transforms/Scalar/LICM.cpp
diff -u llvm/lib/Transforms/Scalar/LICM.cpp:1.55 llvm/lib/Transforms/Scalar/LICM.cpp:1.55.2.1
--- llvm/lib/Transforms/Scalar/LICM.cpp:1.55 Wed Jan 7 18:09:44 2004
+++ llvm/lib/Transforms/Scalar/LICM.cpp Mon Mar 1 17:58:16 2004
@@ -679,7 +679,7 @@
I != E; ++I) {
AliasSet &AS = *I;
// We can promote this alias set if it has a store, if it is a "Must" alias
- // set, and if the pointer is loop invariant.
+ // set, if the pointer is loop invariant, if if we are not eliminating any volatile loads or stores.
if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias() &&
!AS.isVolatile() && isLoopInvariant(AS.begin()->first)) {
assert(AS.begin() != AS.end() &&
Index: llvm/lib/Transforms/Scalar/LoopSimplify.cpp
diff -u llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30 llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30.2.1
--- llvm/lib/Transforms/Scalar/LoopSimplify.cpp:1.30 Wed Jan 7 18:09:44 2004
+++ llvm/lib/Transforms/Scalar/LoopSimplify.cpp Mon Mar 1 17:58:16 2004
@@ -151,7 +151,7 @@
const std::vector<BasicBlock*> &Preds) {
// Create new basic block, insert right before the original block...
- BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB);
+ BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB->getParent(), BB);
// The preheader first gets an unconditional branch to the loop header...
BranchInst *BI = new BranchInst(BB, NewBB);
@@ -484,20 +484,44 @@
/// dominators, dominator trees, and dominance frontiers) after a new block has
/// been added to the CFG.
///
-/// This only supports the case when an existing block (known as "Exit"), had
-/// some of its predecessors factored into a new basic block. This
+/// This only supports the case when an existing block (known as "NewBBSucc"),
+/// had some of its predecessors factored into a new basic block. This
/// transformation inserts a new basic block ("NewBB"), with a single
-/// unconditional branch to Exit, and moves some predecessors of "Exit" to now
-/// branch to NewBB. These predecessors are listed in PredBlocks, even though
-/// they are the same as pred_begin(NewBB)/pred_end(NewBB).
+/// unconditional branch to NewBBSucc, and moves some predecessors of
+/// "NewBBSucc" to now branch to NewBB. These predecessors are listed in
+/// PredBlocks, even though they are the same as
+/// pred_begin(NewBB)/pred_end(NewBB).
///
void LoopSimplify::UpdateDomInfoForRevectoredPreds(BasicBlock *NewBB,
std::vector<BasicBlock*> &PredBlocks) {
+ assert(!PredBlocks.empty() && "No predblocks??");
assert(succ_begin(NewBB) != succ_end(NewBB) &&
++succ_begin(NewBB) == succ_end(NewBB) &&
"NewBB should have a single successor!");
+ BasicBlock *NewBBSucc = *succ_begin(NewBB);
DominatorSet &DS = getAnalysis<DominatorSet>();
+ // The newly inserted basic block will dominate existing basic blocks iff the
+ // PredBlocks dominate all of the non-pred blocks. If all predblocks dominate
+ // the non-pred blocks, then they all must be the same block!
+ bool NewBBDominatesNewBBSucc = true;
+ {
+ BasicBlock *OnePred = PredBlocks[0];
+ for (unsigned i = 1, e = PredBlocks.size(); i != e; ++i)
+ if (PredBlocks[i] != OnePred) {
+ NewBBDominatesNewBBSucc = false;
+ break;
+ }
+
+ if (NewBBDominatesNewBBSucc)
+ for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc);
+ PI != E; ++PI)
+ if (*PI != NewBB && !DS.dominates(NewBBSucc, *PI)) {
+ NewBBDominatesNewBBSucc = false;
+ break;
+ }
+ }
+
// Update dominator information... The blocks that dominate NewBB are the
// intersection of the dominators of predecessors, plus the block itself.
// The newly created basic block does not dominate anything except itself.
@@ -508,13 +532,22 @@
NewBBDomSet.insert(NewBB); // All blocks dominate themselves...
DS.addBasicBlock(NewBB, NewBBDomSet);
+ // If NewBB dominates some blocks, then it will dominate all blocks that
+ // NewBBSucc does.
+ if (NewBBDominatesNewBBSucc) {
+ BasicBlock *PredBlock = PredBlocks[0];
+ Function *F = NewBB->getParent();
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+ if (DS.dominates(NewBBSucc, I))
+ DS.addDominator(I, NewBB);
+ }
+
// Update immediate dominator information if we have it...
BasicBlock *NewBBIDom = 0;
if (ImmediateDominators *ID = getAnalysisToUpdate<ImmediateDominators>()) {
- // This block does not strictly dominate anything, so it is not an immediate
- // dominator. To find the immediate dominator of the new exit node, we
- // trace up the immediate dominators of a predecessor until we find a basic
- // block that dominates the exit block.
+ // To find the immediate dominator of the new exit node, we trace up the
+ // immediate dominators of a predecessor until we find a basic block that
+ // dominates the exit block.
//
BasicBlock *Dom = PredBlocks[0]; // Some random predecessor...
while (!NewBBDomSet.count(Dom)) { // Loop until we find a dominator...
@@ -525,13 +558,21 @@
// Set the immediate dominator now...
ID->addNewBlock(NewBB, Dom);
NewBBIDom = Dom; // Reuse this if calculating DominatorTree info...
+
+ // If NewBB strictly dominates other blocks, we need to update their idom's
+ // now. The only block that need adjustment is the NewBBSucc block, whose
+ // idom should currently be set to PredBlocks[0].
+ if (NewBBDominatesNewBBSucc) {
+ assert(ID->get(NewBBSucc) == PredBlocks[0] &&
+ "Immediate dominator update code broken!");
+ ID->setImmediateDominator(NewBBSucc, NewBB);
+ }
}
// Update DominatorTree information if it is active.
if (DominatorTree *DT = getAnalysisToUpdate<DominatorTree>()) {
- // NewBB doesn't dominate anything, so just create a node and link it into
- // its immediate dominator. If we don't have ImmediateDominator info
- // around, calculate the idom as above.
+ // If we don't have ImmediateDominator info around, calculate the idom as
+ // above.
DominatorTree::Node *NewBBIDomNode;
if (NewBBIDom) {
NewBBIDomNode = DT->getNode(NewBBIDom);
@@ -543,27 +584,58 @@
}
}
- // Create the new dominator tree node...
- DT->createNewNode(NewBB, NewBBIDomNode);
+ // Create the new dominator tree node... and set the idom of NewBB.
+ DominatorTree::Node *NewBBNode = DT->createNewNode(NewBB, NewBBIDomNode);
+
+ // If NewBB strictly dominates other blocks, then it is now the immediate
+ // dominator of NewBBSucc. Update the dominator tree as appropriate.
+ if (NewBBDominatesNewBBSucc) {
+ DominatorTree::Node *NewBBSuccNode = DT->getNode(NewBBSucc);
+ assert(NewBBSuccNode->getIDom()->getBlock() == PredBlocks[0] &&
+ "Immediate tree update code broken!");
+ DT->changeImmediateDominator(NewBBSuccNode, NewBBNode);
+ }
}
// Update dominance frontier information...
if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) {
- // DF(NewBB) is {Exit} because NewBB does not strictly dominate Exit, but it
- // does dominate itself (and there is an edge (NewBB -> Exit)). Exit is the
- // single successor of NewBB.
- DominanceFrontier::DomSetType NewDFSet;
- BasicBlock *Exit = *succ_begin(NewBB);
- NewDFSet.insert(Exit);
- DF->addBasicBlock(NewBB, NewDFSet);
+ // If NewBB dominates NewBBSucc, then the global dominance frontiers are not
+ // changed. DF(NewBB) is now going to be the DF(PredBlocks[0]) without the
+ // stuff that the new block does not dominate a predecessor of.
+ if (NewBBDominatesNewBBSucc) {
+ DominanceFrontier::iterator DFI = DF->find(PredBlocks[0]);
+ if (DFI != DF->end()) {
+ DominanceFrontier::DomSetType Set = DFI->second;
+ // Filter out stuff in Set that we do not dominate a predecessor of.
+ for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
+ E = Set.end(); SetI != E;) {
+ bool DominatesPred = false;
+ for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
+ PI != E; ++PI)
+ if (DS.dominates(NewBB, *PI))
+ DominatesPred = true;
+ if (!DominatesPred)
+ Set.erase(SetI++);
+ else
+ ++SetI;
+ }
- // Now we must loop over all of the dominance frontiers in the function,
- // replacing occurrences of Exit with NewBB in some cases. All blocks that
- // dominate a block in PredBlocks and contained Exit in their dominance
- // frontier must be updated to contain NewBB instead. This only occurs if
- // there is more than one block in PredBlocks.
- //
- if (PredBlocks.size() > 1) {
+ DF->addBasicBlock(NewBB, Set);
+ }
+
+ } else {
+ // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate
+ // NewBBSucc, but it does dominate itself (and there is an edge (NewBB ->
+ // NewBBSucc)). NewBBSucc is the single successor of NewBB.
+ DominanceFrontier::DomSetType NewDFSet;
+ NewDFSet.insert(NewBBSucc);
+ DF->addBasicBlock(NewBB, NewDFSet);
+
+ // Now we must loop over all of the dominance frontiers in the function,
+ // replacing occurrences of NewBBSucc with NewBB in some cases. All
+ // blocks that dominate a block in PredBlocks and contained NewBBSucc in
+ // their dominance frontier must be updated to contain NewBB instead.
+ //
for (unsigned i = 0, e = PredBlocks.size(); i != e; ++i) {
BasicBlock *Pred = PredBlocks[i];
// Get all of the dominators of the predecessor...
@@ -572,13 +644,13 @@
PDE = PredDoms.end(); PDI != PDE; ++PDI) {
BasicBlock *PredDom = *PDI;
- // If the Exit node is in DF(PredDom), then PredDom didn't dominate
- // Exit but did dominate a predecessor of it. Now we change this
- // entry to include NewBB in the DF instead of Exit.
+ // If the NewBBSucc node is in DF(PredDom), then PredDom didn't
+ // dominate NewBBSucc but did dominate a predecessor of it. Now we
+ // change this entry to include NewBB in the DF instead of NewBBSucc.
DominanceFrontier::iterator DFI = DF->find(PredDom);
assert(DFI != DF->end() && "No dominance frontier for node?");
- if (DFI->second.count(Exit)) {
- DF->removeFromFrontier(DFI, Exit);
+ if (DFI->second.count(NewBBSucc)) {
+ DF->removeFromFrontier(DFI, NewBBSucc);
DF->addToFrontier(DFI, NewBB);
}
}
Index: llvm/lib/Transforms/Scalar/LowerAllocations.cpp
diff -u llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43 llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43.2.1
--- llvm/lib/Transforms/Scalar/LowerAllocations.cpp:1.43 Fri Jan 9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/LowerAllocations.cpp Mon Mar 1 17:58:16 2004
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// The LowerAllocations transformation is a target dependent tranformation
+// The LowerAllocations transformation is a target-dependent tranformation
// because it depends on the size of data types and alignment constraints.
//
//===----------------------------------------------------------------------===//
@@ -67,8 +67,13 @@
//
bool LowerAllocations::doInitialization(Module &M) {
const Type *SBPTy = PointerType::get(Type::SByteTy);
- MallocFunc = M.getOrInsertFunction("malloc", SBPTy, Type::UIntTy, 0);
- FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, SBPTy, 0);
+ MallocFunc = M.getNamedFunction("malloc");
+ FreeFunc = M.getNamedFunction("free");
+
+ if (MallocFunc == 0)
+ MallocFunc = M.getOrInsertFunction("malloc", SBPTy, Type::UIntTy, 0);
+ if (FreeFunc == 0)
+ FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, SBPTy, 0);
return true;
}
@@ -101,13 +106,30 @@
MallocArg = BinaryOperator::create(Instruction::Mul, MI->getOperand(0),
MallocArg, "", I);
}
+
+ const FunctionType *MallocFTy = MallocFunc->getFunctionType();
+ std::vector<Value*> MallocArgs;
+ if (MallocFTy->getNumParams() > 0 || MallocFTy->isVarArg()) {
+ if (MallocFTy->getNumParams() > 0 &&
+ MallocFTy->getParamType(0) != Type::UIntTy)
+ MallocArg = new CastInst(MallocArg, MallocFTy->getParamType(0), "",I);
+ MallocArgs.push_back(MallocArg);
+ }
+
+ // If malloc is prototyped to take extra arguments, pass nulls.
+ for (unsigned i = 1; i < MallocFTy->getNumParams(); ++i)
+ MallocArgs.push_back(Constant::getNullValue(MallocFTy->getParamType(i)));
+
// Create the call to Malloc...
- CallInst *MCall = new CallInst(MallocFunc,
- std::vector<Value*>(1, MallocArg), "", I);
+ CallInst *MCall = new CallInst(MallocFunc, MallocArgs, "", I);
// Create a cast instruction to convert to the right type...
- CastInst *MCast = new CastInst(MCall, MI->getType(), "", I);
+ Value *MCast;
+ if (MCall->getType() != Type::VoidTy)
+ MCast = new CastInst(MCall, MI->getType(), "", I);
+ else
+ MCast = Constant::getNullValue(MI->getType());
// Replace all uses of the old malloc inst with the cast inst
MI->replaceAllUsesWith(MCast);
@@ -115,13 +137,23 @@
Changed = true;
++NumLowered;
} else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
- // Cast the argument to free into a ubyte*...
- CastInst *MCast = new CastInst(FI->getOperand(0),
- PointerType::get(Type::SByteTy), "", I);
+ const FunctionType *FreeFTy = FreeFunc->getFunctionType();
+ std::vector<Value*> FreeArgs;
+
+ if (FreeFTy->getNumParams() > 0 || FreeFTy->isVarArg()) {
+ Value *MCast = FI->getOperand(0);
+ if (FreeFTy->getNumParams() > 0 &&
+ FreeFTy->getParamType(0) != MCast->getType())
+ MCast = new CastInst(MCast, FreeFTy->getParamType(0), "", I);
+ FreeArgs.push_back(MCast);
+ }
+
+ // If malloc is prototyped to take extra arguments, pass nulls.
+ for (unsigned i = 1; i < FreeFTy->getNumParams(); ++i)
+ FreeArgs.push_back(Constant::getNullValue(FreeFTy->getParamType(i)));
// Insert a call to the free function...
- CallInst *FCall = new CallInst(FreeFunc, std::vector<Value*>(1, MCast),
- "", I);
+ new CallInst(FreeFunc, FreeArgs, "", I);
// Delete the old free instruction
I = --BBIL.erase(I);
Index: llvm/lib/Transforms/Scalar/LowerInvoke.cpp
diff -u llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4 llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4.4.1
--- llvm/lib/Transforms/Scalar/LowerInvoke.cpp:1.4 Wed Dec 10 14:22:42 2003
+++ llvm/lib/Transforms/Scalar/LowerInvoke.cpp Mon Mar 1 17:58:16 2004
@@ -8,47 +8,205 @@
//===----------------------------------------------------------------------===//
//
// This transformation is designed for use by code generators which do not yet
-// support stack unwinding. This pass gives them the ability to execute any
-// program which does not throw an exception, by turning 'invoke' instructions
-// into calls and by turning 'unwind' instructions into calls to abort().
+// support stack unwinding. This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort(). If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at making the 'invoke' instruction really expensive.
+// It basically inserts setjmp/longjmp calls to emulate the exception handling
+// as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Pass.h"
-#include "llvm/iTerminators.h"
-#include "llvm/iOther.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Constant.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "Support/Statistic.h"
+#include "Support/CommandLine.h"
+#include <csetjmp>
using namespace llvm;
namespace {
Statistic<> NumLowered("lowerinvoke", "Number of invoke & unwinds replaced");
+ cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
class LowerInvoke : public FunctionPass {
+ // Used for both models.
+ Function *WriteFn;
Function *AbortFn;
+ Constant *AbortMessageInit;
+ Value *AbortMessage;
+ unsigned AbortMessageLength;
+
+ // Used for expensive EH support.
+ const Type *JBLinkTy;
+ GlobalVariable *JBListHead;
+ Function *SetJmpFn, *LongJmpFn;
public:
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
+ private:
+ void writeAbortMessage(Instruction *IB);
+ bool insertCheapEHSupport(Function &F);
+ bool insertExpensiveEHSupport(Function &F);
};
RegisterOpt<LowerInvoke>
X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
}
+const PassInfo *llvm::LowerInvokePassID = X.getPassInfo();
+
// Public Interface To the LowerInvoke pass.
FunctionPass *llvm::createLowerInvokePass() { return new LowerInvoke(); }
// doInitialization - Make sure that there is a prototype for abort in the
// current module.
bool LowerInvoke::doInitialization(Module &M) {
+ const Type *VoidPtrTy = PointerType::get(Type::SByteTy);
+ AbortMessage = 0;
+ if (ExpensiveEHSupport) {
+ // Insert a type for the linked list of jump buffers. Unfortunately, we
+ // don't know the size of the target's setjmp buffer, so we make a guess.
+ // If this guess turns out to be too small, bad stuff could happen.
+ unsigned JmpBufSize = 200; // PPC has 192 words
+ assert(sizeof(jmp_buf) <= JmpBufSize*sizeof(void*) &&
+ "LowerInvoke doesn't know about targets with jmp_buf size > 200 words!");
+ const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JmpBufSize);
+
+ { // The type is recursive, so use a type holder.
+ std::vector<const Type*> Elements;
+ OpaqueType *OT = OpaqueType::get();
+ Elements.push_back(PointerType::get(OT));
+ Elements.push_back(JmpBufTy);
+ PATypeHolder JBLType(StructType::get(Elements));
+ OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle.
+ JBLinkTy = JBLType.get();
+ }
+
+ const Type *PtrJBList = PointerType::get(JBLinkTy);
+
+ // Now that we've done that, insert the jmpbuf list head global, unless it
+ // already exists.
+ if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList)))
+ JBListHead = new GlobalVariable(PtrJBList, false,
+ GlobalValue::LinkOnceLinkage,
+ Constant::getNullValue(PtrJBList),
+ "llvm.sjljeh.jblist", &M);
+ SetJmpFn = M.getOrInsertFunction("llvm.setjmp", Type::IntTy,
+ PointerType::get(JmpBufTy), 0);
+ LongJmpFn = M.getOrInsertFunction("llvm.longjmp", Type::VoidTy,
+ PointerType::get(JmpBufTy),
+ Type::IntTy, 0);
+
+ // The abort message for expensive EH support tells the user that the
+ // program 'unwound' without an 'invoke' instruction.
+ Constant *Msg =
+ ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+ AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
+ AbortMessageInit = Msg;
+
+ GlobalVariable *MsgGV = M.getGlobalVariable("abort.msg", Msg->getType());
+ if (MsgGV && (!MsgGV->hasInitializer() || MsgGV->getInitializer() != Msg))
+ MsgGV = 0;
+
+ if (MsgGV) {
+ std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+ AbortMessage =
+ ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+ }
+
+ } else {
+ // The abort message for cheap EH support tells the user that EH is not
+ // enabled.
+ Constant *Msg =
+ ConstantArray::get("Exception handler needed, but not enabled. Recompile"
+ " program with -enable-correct-eh-support.\n");
+ AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
+ AbortMessageInit = Msg;
+
+ GlobalVariable *MsgGV = M.getGlobalVariable("abort.msg", Msg->getType());
+ if (MsgGV && (!MsgGV->hasInitializer() || MsgGV->getInitializer() != Msg))
+ MsgGV = 0;
+
+ if (MsgGV) {
+ std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+ AbortMessage =
+ ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+ }
+ }
+
+ // We need the 'write' and 'abort' functions for both models.
AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, 0);
+
+ // Unfortunately, 'write' can end up being prototyped in several different
+ // ways. If the user defines a three (or more) operand function named 'write'
+ // we will use their prototype. We _do not_ want to insert another instance
+ // of a write prototype, because we don't know that the funcresolve pass will
+ // run after us. If there is a definition of a write function, but it's not
+ // suitable for our uses, we just don't emit write calls. If there is no
+ // write prototype at all, we just add one.
+ if (Function *WF = M.getNamedFunction("write")) {
+ if (WF->getFunctionType()->getNumParams() > 3 ||
+ WF->getFunctionType()->isVarArg())
+ WriteFn = WF;
+ else
+ WriteFn = 0;
+ } else {
+ WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::IntTy,
+ VoidPtrTy, Type::IntTy, 0);
+ }
return true;
}
-bool LowerInvoke::runOnFunction(Function &F) {
+void LowerInvoke::writeAbortMessage(Instruction *IB) {
+ if (WriteFn) {
+ if (!AbortMessage) {
+ GlobalVariable *MsgGV = new GlobalVariable(AbortMessageInit->getType(),
+ true,
+ GlobalValue::InternalLinkage,
+ AbortMessageInit, "abort.msg",
+ WriteFn->getParent());
+ std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::LongTy));
+ AbortMessage =
+ ConstantExpr::getGetElementPtr(ConstantPointerRef::get(MsgGV), GEPIdx);
+ }
+
+ // These are the arguments we WANT...
+ std::vector<Value*> Args;
+ Args.push_back(ConstantInt::get(Type::IntTy, 2));
+ Args.push_back(AbortMessage);
+ Args.push_back(ConstantInt::get(Type::IntTy, AbortMessageLength));
+
+ // If the actual declaration of write disagrees, insert casts as
+ // appropriate.
+ const FunctionType *FT = WriteFn->getFunctionType();
+ unsigned NumArgs = FT->getNumParams();
+ for (unsigned i = 0; i != 3; ++i)
+ if (i < NumArgs && FT->getParamType(i) != Args[i]->getType())
+ Args[i] = ConstantExpr::getCast(cast<Constant>(Args[i]),
+ FT->getParamType(i));
+
+ new CallInst(WriteFn, Args, "", IB);
+ }
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
bool Changed = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
@@ -63,17 +221,21 @@
new BranchInst(II->getNormalDest(), II);
// Remove any PHI node entries from the exception destination.
- II->getExceptionalDest()->removePredecessor(BB);
+ II->getUnwindDest()->removePredecessor(BB);
// Remove the invoke instruction now.
BB->getInstList().erase(II);
++NumLowered; Changed = true;
} else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+ writeAbortMessage(UI);
+
// Insert a call to abort()
new CallInst(AbortFn, std::vector<Value*>(), "", UI);
- // Insert a return instruction.
+ // Insert a return instruction. This really should be a "barrier", as it
+ // is unreachable.
new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
Constant::getNullValue(F.getReturnType()), UI);
@@ -83,4 +245,152 @@
++NumLowered; Changed = true;
}
return Changed;
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+ bool Changed = false;
+
+ // If a function uses invoke, we have an alloca for the jump buffer.
+ AllocaInst *JmpBuf = 0;
+
+ // If this function contains an unwind instruction, two blocks get added: one
+ // to actually perform the longjmp, and one to terminate the program if there
+ // is no handler.
+ BasicBlock *UnwindBlock = 0, *TermBlock = 0;
+ std::vector<LoadInst*> JBPtrs;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ if (JmpBuf == 0)
+ JmpBuf = new AllocaInst(JBLinkTy, 0, "jblink", F.begin()->begin());
+
+ // On the entry to the invoke, we must install our JmpBuf as the top of
+ // the stack.
+ LoadInst *OldEntry = new LoadInst(JBListHead, "oldehlist", II);
+
+ // Store this old value as our 'next' field, and store our alloca as the
+ // current jblist.
+ std::vector<Value*> Idx;
+ Idx.push_back(Constant::getNullValue(Type::LongTy));
+ Idx.push_back(ConstantUInt::get(Type::UByteTy, 0));
+ Value *NextFieldPtr = new GetElementPtrInst(JmpBuf, Idx, "NextField", II);
+ new StoreInst(OldEntry, NextFieldPtr, II);
+ new StoreInst(JmpBuf, JBListHead, II);
+
+ // Call setjmp, passing in the address of the jmpbuffer.
+ Idx[1] = ConstantUInt::get(Type::UByteTy, 1);
+ Value *JmpBufPtr = new GetElementPtrInst(JmpBuf, Idx, "TheJmpBuf", II);
+ Value *SJRet = new CallInst(SetJmpFn, JmpBufPtr, "sjret", II);
+
+ // Compare the return value to zero.
+ Value *IsNormal = BinaryOperator::create(Instruction::SetEQ, SJRet,
+ Constant::getNullValue(SJRet->getType()),
+ "notunwind", II);
+ // Create the receiver block if there is a critical edge to the normal
+ // destination.
+ SplitCriticalEdge(II, 0, this);
+ Instruction *InsertLoc = II->getNormalDest()->begin();
+
+ // Insert a normal call instruction on the normal execution path.
+ std::string Name = II->getName(); II->setName("");
+ Value *NewCall = new CallInst(II->getCalledValue(),
+ std::vector<Value*>(II->op_begin()+3,
+ II->op_end()), Name,
+ InsertLoc);
+ II->replaceAllUsesWith(NewCall);
+
+ // If we got this far, then no exception was thrown and we can pop our
+ // jmpbuf entry off.
+ new StoreInst(OldEntry, JBListHead, InsertLoc);
+
+ // Now we change the invoke into a branch instruction.
+ new BranchInst(II->getNormalDest(), II->getUnwindDest(), IsNormal, II);
+
+ // Remove the InvokeInst now.
+ BB->getInstList().erase(II);
+ ++NumLowered; Changed = true;
+
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ if (UnwindBlock == 0) {
+ // Create two new blocks, the unwind block and the terminate block. Add
+ // them at the end of the function because they are not hot.
+ UnwindBlock = new BasicBlock("unwind", &F);
+ TermBlock = new BasicBlock("unwinderror", &F);
+
+ // Insert return instructions. These really should be "barrier"s, as
+ // they are unreachable.
+ new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
+ Constant::getNullValue(F.getReturnType()), UnwindBlock);
+ new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
+ Constant::getNullValue(F.getReturnType()), TermBlock);
+ }
+
+ // Load the JBList, if it's null, then there was no catch!
+ LoadInst *Ptr = new LoadInst(JBListHead, "ehlist", UI);
+ Value *NotNull = BinaryOperator::create(Instruction::SetNE, Ptr,
+ Constant::getNullValue(Ptr->getType()),
+ "notnull", UI);
+ new BranchInst(UnwindBlock, TermBlock, NotNull, UI);
+
+ // Remember the loaded value so we can insert the PHI node as needed.
+ JBPtrs.push_back(Ptr);
+
+ // Remove the UnwindInst now.
+ BB->getInstList().erase(UI);
+ ++NumLowered; Changed = true;
+ }
+
+ // If an unwind instruction was inserted, we need to set up the Unwind and
+ // term blocks.
+ if (UnwindBlock) {
+ // In the unwind block, we know that the pointer coming in on the JBPtrs
+ // list are non-null.
+ Instruction *RI = UnwindBlock->getTerminator();
+
+ Value *RecPtr;
+ if (JBPtrs.size() == 1)
+ RecPtr = JBPtrs[0];
+ else {
+ // If there is more than one unwind in this function, make a PHI node to
+ // merge in all of the loaded values.
+ PHINode *PN = new PHINode(JBPtrs[0]->getType(), "jbptrs", RI);
+ for (unsigned i = 0, e = JBPtrs.size(); i != e; ++i)
+ PN->addIncoming(JBPtrs[i], JBPtrs[i]->getParent());
+ RecPtr = PN;
+ }
+
+ // Now that we have a pointer to the whole record, remove the entry from the
+ // JBList.
+ std::vector<Value*> Idx;
+ Idx.push_back(Constant::getNullValue(Type::LongTy));
+ Idx.push_back(ConstantUInt::get(Type::UByteTy, 0));
+ Value *NextFieldPtr = new GetElementPtrInst(RecPtr, Idx, "NextField", RI);
+ Value *NextRec = new LoadInst(NextFieldPtr, "NextRecord", RI);
+ new StoreInst(NextRec, JBListHead, RI);
+
+ // Now that we popped the top of the JBList, get a pointer to the jmpbuf and
+ // longjmp.
+ Idx[1] = ConstantUInt::get(Type::UByteTy, 1);
+ Idx[0] = new GetElementPtrInst(RecPtr, Idx, "JmpBuf", RI);
+ Idx[1] = ConstantInt::get(Type::IntTy, 1);
+ new CallInst(LongJmpFn, Idx, "", RI);
+
+ // Now we set up the terminate block.
+ RI = TermBlock->getTerminator();
+
+ // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+ writeAbortMessage(RI);
+
+ // Insert a call to abort()
+ new CallInst(AbortFn, std::vector<Value*>(), "", RI);
+ }
+
+ return Changed;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+ if (ExpensiveEHSupport)
+ return insertExpensiveEHSupport(F);
+ else
+ return insertCheapEHSupport(F);
}
Index: llvm/lib/Transforms/Scalar/LowerSwitch.cpp
diff -u llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10 llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10.2.1
--- llvm/lib/Transforms/Scalar/LowerSwitch.cpp:1.10 Fri Jan 9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/LowerSwitch.cpp Mon Mar 1 17:58:16 2004
@@ -115,7 +115,8 @@
Case& Pivot = *(Begin + Mid);
DEBUG(std::cerr << "Pivot ==> "
- << cast<ConstantUInt>(Pivot.first)->getValue() << "\n");
+ << (int64_t)cast<ConstantInt>(Pivot.first)->getRawValue()
+ << "\n");
BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
OrigBlock, Default);
Index: llvm/lib/Transforms/Scalar/SCCP.cpp
diff -u llvm/lib/Transforms/Scalar/SCCP.cpp:1.88 llvm/lib/Transforms/Scalar/SCCP.cpp:1.88.2.1
--- llvm/lib/Transforms/Scalar/SCCP.cpp:1.88 Mon Jan 12 13:08:43 2004
+++ llvm/lib/Transforms/Scalar/SCCP.cpp Mon Mar 1 17:58:16 2004
@@ -689,14 +689,16 @@
// addressing...
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i)
if (ConstantUInt *CU = dyn_cast<ConstantUInt>(CE->getOperand(i))) {
- ConstantStruct *CS = cast<ConstantStruct>(C);
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+ if (CS == 0) return 0;
if (CU->getValue() >= CS->getValues().size()) return 0;
C = cast<Constant>(CS->getValues()[CU->getValue()]);
} else if (ConstantSInt *CS = dyn_cast<ConstantSInt>(CE->getOperand(i))) {
- ConstantArray *CA = cast<ConstantArray>(C);
+ ConstantArray *CA = dyn_cast<ConstantArray>(C);
+ if (CA == 0) return 0;
if ((uint64_t)CS->getValue() >= CA->getValues().size()) return 0;
C = cast<Constant>(CA->getValues()[CS->getValue()]);
- } else
+ } else
return 0;
return C;
}
Index: llvm/lib/Transforms/Scalar/TailDuplication.cpp
diff -u llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11 llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11.2.1
--- llvm/lib/Transforms/Scalar/TailDuplication.cpp:1.11 Fri Jan 9 00:02:20 2004
+++ llvm/lib/Transforms/Scalar/TailDuplication.cpp Mon Mar 1 17:58:16 2004
@@ -41,6 +41,7 @@
bool runOnFunction(Function &F);
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI);
+ inline bool canEliminateUnconditionalBranch(TerminatorInst *TI);
inline void eliminateUnconditionalBranch(BranchInst *BI);
inline void InsertPHINodesIfNecessary(Instruction *OrigInst, Value *NewInst,
BasicBlock *NewBlock);
@@ -63,7 +64,8 @@
bool TailDup::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; )
- if (shouldEliminateUnconditionalBranch(I->getTerminator())) {
+ if (shouldEliminateUnconditionalBranch(I->getTerminator()) &&
+ canEliminateUnconditionalBranch(I->getTerminator())) {
eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
Changed = true;
} else {
@@ -109,6 +111,36 @@
for (unsigned Size = 0; I != Dest->end(); ++Size, ++I)
if (Size == 6) return false; // The block is too large...
return true;
+}
+
+/// canEliminateUnconditionalBranch - Unfortunately, the general form of tail
+/// duplication can do very bad things to SSA form, by destroying arbitrary
+/// relationships between dominators and dominator frontiers as it processes the
+/// program. The right solution for this is to have an incrementally updating
+/// dominator data structure, which can gracefully react to arbitrary
+/// "addEdge/removeEdge" changes to the CFG. Implementing this is nontrivial,
+/// however, so we just disable the transformation in cases where it is not
+/// currently safe.
+///
+bool TailDup::canEliminateUnconditionalBranch(TerminatorInst *TI) {
+ // Basically, we refuse to make the transformation if any of the values
+ // computed in the 'tail' are used in any other basic blocks.
+ BasicBlock *BB = TI->getParent();
+ BasicBlock *Tail = TI->getSuccessor(0);
+ assert(isa<BranchInst>(TI) && cast<BranchInst>(TI)->isUnconditional());
+
+ for (BasicBlock::iterator I = Tail->begin(), E = Tail->end(); I != E; ++I)
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+ ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User->getParent() != Tail && User->getParent() != BB)
+ return false;
+
+ // The 'swap' problem foils the tail duplication rewriting code.
+ if (isa<PHINode>(User) && User->getParent() == Tail)
+ return false;
+ }
+ return true;
}
Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
diff -u llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12 llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12.2.1
--- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.12 Sun Dec 14 17:57:39 2003
+++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp Mon Mar 1 17:58:16 2004
@@ -290,7 +290,7 @@
if (OldEntry == 0) {
OldEntry = &F->getEntryBlock();
std::string OldName = OldEntry->getName(); OldEntry->setName("tailrecurse");
- BasicBlock *NewEntry = new BasicBlock(OldName, OldEntry);
+ BasicBlock *NewEntry = new BasicBlock(OldName, F, OldEntry);
new BranchInst(OldEntry, NewEntry);
// Now that we have created a new block, which jumps to the entry
More information about the llvm-commits
mailing list