[llvm-commits] [llvm] r60408 - in /llvm/trunk: lib/Transforms/Scalar/GVN.cpp test/Transforms/GVN/pre-load.ll
Chris Lattner
sabre at nondot.org
Tue Dec 2 00:16:11 PST 2008
Author: lattner
Date: Tue Dec 2 02:16:11 2008
New Revision: 60408
URL: http://llvm.org/viewvc/llvm-project?rev=60408&view=rev
Log:
Implement PRE of loads in the GVN pass with a pretty cheap and
straight-forward implementation. This does not require any extra
alias analysis queries beyond what we already do for non-local loads.
Some programs really really like load PRE. For example, SPASS triggers
this ~1000 times, ~300 times in 255.vortex, and ~1500 times on 403.gcc.
The biggest limitation to the implementation is that it does not split
critical edges. This is a huge killer on many programs and should be
addressed after the initial patch is enabled by default.
The implementation of this should incidentally speed up rejection of
non-local loads because it avoids creating the repl densemap in cases
when it won't be used for fully redundant loads.
This is currently disabled by default.
Before I turn this on, I need to fix a couple of miscompilations in
the testsuite, look at compile time performance numbers, and look at
perf impact. This is pretty close to ready though.
Added:
llvm/trunk/test/Transforms/GVN/pre-load.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/GVN.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=60408&r1=60407&r2=60408&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Tue Dec 2 02:16:11 2008
@@ -43,9 +43,11 @@
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
+cl::opt<bool> EnableLoadPRE("enable-load-pre");
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -863,23 +865,46 @@
return v;
}
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block. As we go, keep
+/// track of which blocks we know it is fully alive or not in
+/// FullyAvailableBlocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+ DenseMap<BasicBlock*, bool> &FullyAvailableBlocks) {
+ // Optimistically assume that the block is fully available and check to see
+ // if we already know about this block in one lookup.
+ std::pair<DenseMap<BasicBlock*, bool>::iterator, bool> IV =
+ FullyAvailableBlocks.insert(std::make_pair(BB, true));
+
+ // If the entry already existed for this block, return the precomputed value.
+ if (!IV.second)
+ return IV.first->second;
+
+ // Otherwise, see if it is fully available in all predecessors.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+ // If this block has no predecessors, it isn't live-in here.
+ if (PI == PE)
+ return FullyAvailableBlocks[BB] = false;
+
+ for (; PI != PE; ++PI)
+ // If the value isn't fully available in one of our predecessors, then it
+ // isn't fully available in this block either. Undo our previous
+ // optimistic assumption and bail out.
+ if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ return FullyAvailableBlocks[BB] = false;
+
+ return true;
+}
+
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst* L,
+bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVectorImpl<Instruction*> &toErase) {
- // Find the non-local dependencies of the load
+ // Find the non-local dependencies of the load.
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
- MD->getNonLocalDependency(L);
- DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << deps.size() << *L);
-#if 0
- DEBUG(for (unsigned i = 0, e = deps.size(); i != e; ++i) {
- cerr << " " << deps[i].first->getName();
- if (Instruction *I = deps[i].second.getInst())
- cerr << *I;
- else
- cerr << "\n";
- });
-#endif
+ MD->getNonLocalDependency(LI);
+ //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << deps.size() << *LI);
// If we had to process more than one hundred blocks to find the
// dependencies, this load isn't worth worrying about. Optimizing
@@ -887,11 +912,15 @@
if (deps.size() > 100)
return false;
- BasicBlock *EntryBlock = &L->getParent()->getParent()->getEntryBlock();
+ BasicBlock *EntryBlock = &LI->getParent()->getParent()->getEntryBlock();
- DenseMap<BasicBlock*, Value*> repl;
+ // Filter out useless results (non-locals, etc). Keep track of the blocks
+ // where we have a value available in repl, also keep track of whether we see
+ // dependencies that produce an unknown value for the load (such as a call
+ // that could potentially clobber the load).
+ SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock;
+ SmallVector<BasicBlock*, 16> UnavailableBlocks;
- // Filter out useless results (non-locals, etc)
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
BasicBlock *DepBB = deps[i].first;
MemDepResult DepInfo = deps[i].second;
@@ -900,17 +929,14 @@
// If this is a non-local dependency in the entry block, then we depend on
// the value live-in at the start of the function. We could insert a load
// in the entry block to get this, but for now we'll just bail out.
- //
- // FIXME: Consider emitting a load in the entry block to catch this case!
- // Tricky part is to sink so that it doesn't execute in places where it
- // isn't needed.
if (DepBB == EntryBlock)
- return false;
+ UnavailableBlocks.push_back(DepBB);
continue;
}
if (DepInfo.isNone()) {
- repl[DepBB] = UndefValue::get(L->getType());
+ ValuesPerBlock.push_back(std::make_pair(DepBB,
+ UndefValue::get(LI->getType())));
continue;
}
@@ -920,52 +946,165 @@
// NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because
// of bitfield access, it would be interesting to optimize for it at some
// point.
- if (S->getOperand(0)->getType() != L->getType())
- return false;
+ if (S->getOperand(0)->getType() != LI->getType()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
- if (S->getPointerOperand() != L->getPointerOperand() &&
+ if (S->getPointerOperand() != LI->getPointerOperand() &&
VN.getAliasAnalysis()->alias(S->getPointerOperand(), 1,
- L->getPointerOperand(), 1)
- != AliasAnalysis::MustAlias)
- return false;
- repl[DepBB] = S->getOperand(0);
+ LI->getPointerOperand(), 1)
+ != AliasAnalysis::MustAlias) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0)));
+
} else if (LoadInst* LD = dyn_cast<LoadInst>(DepInfo.getInst())) {
- if (LD->getType() != L->getType())
- return false;
+ if (LD->getType() != LI->getType()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
- if (LD->getPointerOperand() != L->getPointerOperand() &&
+ if (LD->getPointerOperand() != LI->getPointerOperand() &&
VN.getAliasAnalysis()->alias(LD->getPointerOperand(), 1,
- L->getPointerOperand(), 1)
- != AliasAnalysis::MustAlias)
- return false;
- repl[DepBB] = LD;
+ LI->getPointerOperand(), 1)
+ != AliasAnalysis::MustAlias) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+ ValuesPerBlock.push_back(std::make_pair(DepBB, LD));
} else {
- return false;
+ UnavailableBlocks.push_back(DepBB);
+ continue;
}
}
- // Use cached PHI construction information from previous runs
- SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()];
- for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
- I != E; ++I) {
- if ((*I)->getParent() == L->getParent()) {
- L->replaceAllUsesWith(*I);
- toErase.push_back(L);
- NumGVNLoad++;
- return true;
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty()) return false;
+
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ // Use cached PHI construction information from previous runs
+ SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()];
+ for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
+ I != E; ++I) {
+ if ((*I)->getParent() == LI->getParent()) {
+ DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI);
+ LI->replaceAllUsesWith(*I);
+ toErase.push_back(LI);
+ NumGVNLoad++;
+ return true;
+ }
+
+ ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
}
- repl.insert(std::make_pair((*I)->getParent(), *I));
+ DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI);
+
+ DenseMap<BasicBlock*, Value*> BlockReplValues;
+ BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
+ // Perform PHI construction.
+ Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
+ LI->replaceAllUsesWith(v);
+ toErase.push_back(LI);
+ NumGVNLoad++;
+ return true;
}
+
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
- DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *L);
+ // Okay, we have *some* definitions of the value. This means that the value
+ // is available in some of our (transitive) predecessors. Lets think about
+ // doing PRE of this load. This will involve inserting a new load into the
+ // predecessor when it's not available. We could do this in general, but
+ // prefer to not increase code size. As such, we only do this when we know
+ // that we only have to insert *one* load (which means we're basically moving
+ // the load, not inserting a new one).
+
+ // Everything we do here is based on local predecessors of LI's block. If it
+ // only has one predecessor, bail now.
+ BasicBlock *LoadBB = LI->getParent();
+ if (LoadBB->getSinglePredecessor())
+ return false;
+
+ // If we have a repl set with LI itself in it, this means we have a loop where
+ // at least one of the values is LI. Since this means that we won't be able
+ // to eliminate LI even if we insert uses in the other predecessors, we will
+ // end up increasing code size. Reject this by scanning for LI.
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ if (ValuesPerBlock[i].second == LI)
+ return false;
+
+ // Okay, we have some hope :). Check to see if the loaded value is fully
+ // available in all but one predecessor.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ BasicBlock *UnavailablePred = 0;
+
+ DenseMap<BasicBlock*, bool> FullyAvailableBlocks;
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+ FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
+ for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+ FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+ for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+ PI != E; ++PI) {
+ if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+ continue;
+
+ // If this load is not available in multiple predecessors, reject it.
+ if (UnavailablePred && UnavailablePred != *PI)
+ return false;
+ UnavailablePred = *PI;
+ }
+
+ assert(UnavailablePred != 0 &&
+ "Fully available value should be eliminated above!");
+
+ // If the loaded pointer is PHI node defined in this block, do PHI translation
+ // to get its value in the predecessor.
+ Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
+
+ // Make sure the value is live in the predecessor. If it was defined by a
+ // non-PHI instruction in this block, we don't know how to recompute it above.
+ if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
+ if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
+ DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
+ << *LPInst << *LI << "\n");
+ return false;
+ }
+
+ // We don't currently handle critical edges :(
+ if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
+ DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
+ << UnavailablePred->getName() << "': " << *LI);
+ return false;
+ }
- // Perform PHI construction
- SmallPtrSet<BasicBlock*, 4> visited;
- Value* v = GetValueForBlock(L->getParent(), L, repl, true);
- L->replaceAllUsesWith(v);
- toErase.push_back(L);
- NumGVNLoad++;
+ // Okay, we can eliminate this load by inserting a reload in the predecessor
+ // and using PHI construction to get the value in the other predecessors, do
+ // it.
+ DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI);
+
+ Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ DenseMap<BasicBlock*, Value*> BlockReplValues;
+ BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
+ BlockReplValues[UnavailablePred] = NewLoad;
+
+ // Perform PHI construction.
+ Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
+ LI->replaceAllUsesWith(v);
+ toErase.push_back(LI);
+ NumPRELoad++;
return true;
}
Added: llvm/trunk/test/Transforms/GVN/pre-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/pre-load.ll?rev=60408&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/pre-load.ll (added)
+++ llvm/trunk/test/Transforms/GVN/pre-load.ll Tue Dec 2 02:16:11 2008
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | opt -gvn -enable-load-pre | llvm-dis | grep {%PRE.rle = phi}
+
+define i32 @test(i32* %p, i1 %C) {
+block1:
+ br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+
+block3:
+ %b = bitcast i32 0 to i32
+ store i32 %b, i32* %p
+ br label %block4
+
+block4:
+ %PRE = load i32* %p
+ ret i32 %PRE
+}
More information about the llvm-commits
mailing list