[llvm-commits] [llvm] r61123 - in /llvm/trunk: lib/Transforms/IPO/GlobalOpt.cpp test/Transforms/GlobalOpt/heap-sra-phi.ll
Chris Lattner
sabre at nondot.org
Tue Dec 16 21:28:55 PST 2008
Author: lattner
Date: Tue Dec 16 23:28:49 2008
New Revision: 61123
URL: http://llvm.org/viewvc/llvm-project?rev=61123&view=rev
Log:
Enhance heap sra to be substantially more aggressive w.r.t PHI
nodes. This allows it to do fairly general phi insertion if a
load from a pointer global wants to be SRAd but the load is used
by (recursive) phi nodes. This fixes a pessimization on ppc
introduced by Load PRE.
Added:
llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll
Modified:
llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=61123&r1=61122&r2=61123&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Dec 16 23:28:49 2008
@@ -34,6 +34,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include <algorithm>
using namespace llvm;
@@ -1015,8 +1016,8 @@
/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
/// of a load) are simple enough to perform heap SRA on. This permits GEP's
/// that index through the array and struct field, icmps of null, and PHIs.
-static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, MallocInst *MI,
- SmallPtrSet<PHINode*, 32> &AnalyzedLoadUsingPHIs) {
+static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
+ SmallPtrSet<PHINode*, 32> &LoadUsingPHIs) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
@@ -1041,25 +1042,11 @@
if (PHINode *PN = dyn_cast<PHINode>(User)) {
// If we have already recursively analyzed this PHI, then it is safe.
- if (AnalyzedLoadUsingPHIs.insert(PN))
+ if (LoadUsingPHIs.insert(PN))
continue;
- // We have a phi of a load from the global. We can only handle this
- // if the other PHI'd values are actually the same. In this case,
- // the rewriter will just drop the phi entirely.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *IV = PN->getIncomingValue(i);
- if (IV == V) continue; // Trivial the same.
-
- // If the phi'd value is from the malloc that initializes the value,
- // we can xform it.
- if (IV == MI) continue;
-
- // Otherwise, we don't know what it is.
- return false;
- }
-
- if (!LoadUsesSimpleEnoughForHeapSRA(PN, MI, AnalyzedLoadUsingPHIs))
+ // Make sure all uses of the PHI are simple enough to transform.
+ if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs))
return false;
continue;
@@ -1077,45 +1064,100 @@
/// GV are simple enough to perform HeapSRA, return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
MallocInst *MI) {
- SmallPtrSet<PHINode*, 32> AnalyzedLoadUsingPHIs;
+ SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
++UI)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
- if (!LoadUsesSimpleEnoughForHeapSRA(LI, MI, AnalyzedLoadUsingPHIs))
+ if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs))
+ return false;
+
+ // If we reach here, we know that all uses of the loads and transitive uses
+ // (through PHI nodes) are simple enough to transform. However, we don't know
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // Check to verify that all operands of the PHIs are either PHIS that can be
+ // transformed, loads from GV, or MI itself.
+ for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(),
+ E = LoadUsingPHIs.end(); I != E; ++I) {
+ PHINode *PN = *I;
+ for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+ Value *InVal = PN->getIncomingValue(op);
+
+ // PHI of the stored value itself is ok.
+ if (InVal == MI) continue;
+
+ if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ // One of the PHIs in our set is (optimistically) ok.
+ if (LoadUsingPHIs.count(InPN))
+ continue;
return false;
+ }
+
+ // Load from GV is ok.
+ if (LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (LI->getOperand(0) == GV)
+ continue;
+
+ // UNDEF? NULL?
+
+ // Anything else is rejected.
+ return false;
+ }
+ }
+
return true;
}
-/// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
-/// value, lazily creating it on demand.
-static Value *GetHeapSROALoad(Instruction *Load, unsigned FieldNo,
- const std::vector<GlobalVariable*> &FieldGlobals,
- std::vector<Value *> &InsertedLoadsForPtr) {
- if (InsertedLoadsForPtr.size() <= FieldNo)
- InsertedLoadsForPtr.resize(FieldNo+1);
- if (InsertedLoadsForPtr[FieldNo] == 0)
- InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo],
- Load->getName()+".f" +
- utostr(FieldNo), Load);
- return InsertedLoadsForPtr[FieldNo];
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+ if (FieldNo >= FieldVals.size())
+ FieldVals.resize(FieldNo+1);
+
+ // If we already have this value, just reuse the previously scalarized
+ // version.
+ if (Value *FieldVal = FieldVals[FieldNo])
+ return FieldVal;
+
+ // Depending on what instruction this is, we have several cases.
+ Value *Result;
+ if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ // This is a scalarized version of the load from the global. Just create
+ // a new Load of the scalarized global.
+ Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues,
+ PHIsToRewrite),
+ LI->getName()+".f" + utostr(FieldNo), LI);
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ // PN's type is pointer to struct. Make a new PHI of pointer to struct
+ // field.
+ const StructType *ST =
+ cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+ Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PN->getName()+".f"+utostr(FieldNo), PN);
+ PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+ } else {
+ assert(0 && "Unknown usable value");
+ Result = 0;
+ }
+
+ return FieldVals[FieldNo] = Result;
}
/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
/// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(LoadInst *Load, Instruction *LoadUser,
- const std::vector<GlobalVariable*> &FieldGlobals,
- std::vector<Value *> &InsertedLoadsForPtr) {
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
// If we have a setcc of the loaded pointer, we can use a setcc of any
// field.
- Value *NPtr;
- if (InsertedLoadsForPtr.empty()) {
- NPtr = GetHeapSROALoad(Load, 0, FieldGlobals, InsertedLoadsForPtr);
- } else {
- NPtr = InsertedLoadsForPtr.back();
- }
+ Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+ InsertedScalarizedValues, PHIsToRewrite);
Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
Constant::getNullValue(NPtr->getType()),
@@ -1125,15 +1167,15 @@
return;
}
- // Handle 'getelementptr Ptr, Idx, uint FieldNo ...'
+ // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
&& "Unexpected GEPI!");
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
- Value *NewPtr = GetHeapSROALoad(Load, FieldNo,
- FieldGlobals, InsertedLoadsForPtr);
+ Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
@@ -1147,41 +1189,26 @@
GEPI->eraseFromParent();
return;
}
-
- // Handle PHI nodes. PHI nodes must be merging in the same values, plus
- // potentially the original malloc. Insert phi nodes for each field, then
- // process uses of the PHI.
- PHINode *PN = cast<PHINode>(LoadUser);
- std::vector<Value *> PHIsForField;
- PHIsForField.resize(FieldGlobals.size());
- for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- Value *LoadV = GetHeapSROALoad(Load, i, FieldGlobals, InsertedLoadsForPtr);
- PHINode *FieldPN = PHINode::Create(LoadV->getType(),
- PN->getName()+"."+utostr(i), PN);
- // Fill in the predecessor values.
- for (unsigned pred = 0, e = PN->getNumIncomingValues(); pred != e; ++pred) {
- // Each predecessor either uses the load or the original malloc.
- Value *InVal = PN->getIncomingValue(pred);
- BasicBlock *BB = PN->getIncomingBlock(pred);
- Value *NewVal;
- if (isa<MallocInst>(InVal)) {
- // Insert a reload from the global in the predecessor.
- NewVal = GetHeapSROALoad(BB->getTerminator(), i, FieldGlobals,
- PHIsForField);
- } else {
- NewVal = InsertedLoadsForPtr[i];
- }
- FieldPN->addIncoming(NewVal, BB);
- }
- PHIsForField[i] = FieldPN;
- }
-
- // Since PHIsForField specifies a phi for every input value, the lazy inserter
- // will never insert a load.
- while (!PN->use_empty())
- RewriteHeapSROALoadUser(Load, PN->use_back(), FieldGlobals, PHIsForField);
- PN->eraseFromParent();
+ // Recursively transform the users of PHI nodes. This will lazily create the
+ // PHIs that are needed for individual elements. Keep track of what PHIs we
+ // see in InsertedScalarizedValues so that we don't get infinite loops (very
+ // antisocial). If the PHI is already in InsertedScalarizedValues, it has
+ // already been seen first by another load, so its uses have already been
+ // processed.
+ PHINode *PN = cast<PHINode>(LoadUser);
+ bool Inserted;
+ DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+ tie(InsertPos, Inserted) =
+ InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+ if (!Inserted) return;
+
+ // If this is the first time we've seen this PHI, recursively process all
+ // users.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E;
+ ++UI)
+ RewriteHeapSROALoadUser(cast<Instruction>(*UI), InsertedScalarizedValues,
+ PHIsToRewrite);
}
/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
@@ -1189,12 +1216,17 @@
/// use FieldGlobals instead. All uses of loaded values satisfy
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
- const std::vector<GlobalVariable*> &FieldGlobals) {
- std::vector<Value *> InsertedLoadsForPtr;
- //InsertedLoadsForPtr.resize(FieldGlobals.size());
- while (!Load->use_empty())
- RewriteHeapSROALoadUser(Load, Load->use_back(),
- FieldGlobals, InsertedLoadsForPtr);
+ DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+ UI != E; )
+ RewriteHeapSROALoadUser(cast<Instruction>(*UI++), InsertedScalarizedValues,
+ PHIsToRewrite);
+
+ if (Load->use_empty()) {
+ Load->eraseFromParent();
+ InsertedScalarizedValues.erase(Load);
+ }
}
/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break
@@ -1211,7 +1243,7 @@
// Okay, at this point, there are no users of the malloc. Insert N
// new mallocs at the same place as MI, and N globals.
- std::vector<GlobalVariable*> FieldGlobals;
+ std::vector<Value*> FieldGlobals;
std::vector<MallocInst*> FieldMallocs;
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
@@ -1293,37 +1325,82 @@
// MI is no longer needed, remove it.
MI->eraseFromParent();
+ /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+ /// update all uses of the load, keep track of what scalarized loads are
+ /// inserted for a given load.
+ DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ InsertedScalarizedValues[GV] = FieldGlobals;
+
+ std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
- while (!GV->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
- RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals);
- LI->eraseFromParent();
- } else {
- // Must be a store of null.
- StoreInst *SI = cast<StoreInst>(GV->use_back());
- assert(isa<Constant>(SI->getOperand(0)) &&
- cast<Constant>(SI->getOperand(0))->isNullValue() &&
- "Unexpected heap-sra user!");
-
- // Insert a store of null into each global.
- for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- Constant *Null =
- Constant::getNullValue(FieldGlobals[i]->getType()->getElementType());
- new StoreInst(Null, FieldGlobals[i], SI);
- }
- // Erase the original store.
- SI->eraseFromParent();
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+ continue;
+ }
+
+ // Must be a store of null.
+ StoreInst *SI = cast<StoreInst>(User);
+ assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+ "Unexpected heap-sra user!");
+
+ // Insert a store of null into each global.
+ for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+ const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+ Constant *Null = Constant::getNullValue(PT->getElementType());
+ new StoreInst(Null, FieldGlobals[i], SI);
+ }
+ // Erase the original store.
+ SI->eraseFromParent();
+ }
+
+ // While we have PHIs that are interesting to rewrite, do it.
+ while (!PHIsToRewrite.empty()) {
+ PHINode *PN = PHIsToRewrite.back().first;
+ unsigned FieldNo = PHIsToRewrite.back().second;
+ PHIsToRewrite.pop_back();
+ PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+ assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+ // Add all the incoming values. This can materialize more phis.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+ PHIsToRewrite);
+ FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
-
+
+ // Drop all inter-phi links and any loads that made it this far.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->dropAllReferences();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->dropAllReferences();
+ }
+
+ // Delete all the phis and loads now that inter-references are dead.
+ for (DenseMap<Value*, std::vector<Value*> >::iterator
+ I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+ I != E; ++I) {
+ if (PHINode *PN = dyn_cast<PHINode>(I->first))
+ PN->eraseFromParent();
+ else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+ LI->eraseFromParent();
+ }
+
// The old global is now dead, remove it.
GV->eraseFromParent();
++NumHeapSRA;
- return FieldGlobals[0];
+ return cast<GlobalVariable>(FieldGlobals[0]);
}
/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
Added: llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll?rev=61123&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll (added)
+++ llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll Tue Dec 16 23:28:49 2008
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f1 = phi i32. }
+; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f0 = phi i32. }
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+ %struct.foo = type { i32, i32 }
+ at X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+ %tmp = malloc [1000000 x %struct.foo] ; <[1000000 x %struct.foo]*> [#uses=1]
+ %.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0 ; <%struct.foo*> [#uses=1]
+ store %struct.foo* %.sub, %struct.foo** @X, align 4
+ ret void
+}
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+ %tmpLD1 = load %struct.foo** @X, align 4 ; <%struct.foo*> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb1.thread
+ %tmp = phi %struct.foo* [%tmpLD1, %bb1.thread ], [ %tmpLD2, %bb1 ] ; <i32> [#uses=2]
+ %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] ; <i32> [#uses=2]
+ %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %tmp3, %bb1 ] ; <i32> [#uses=1]
+ %tmp1 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = load i32* %tmp1, align 4 ; <i32> [#uses=1]
+ %tmp6 = add i32 %tmp2, %sum.0.reg2mem.0 ; <i32> [#uses=2]
+ %tmp4 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 1 ; <i32*> [#uses=1]
+ %tmp5 = load i32 * %tmp4
+ %tmp3 = add i32 %tmp5, %tmp6
+ %indvar.next = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
+
+ %tmpLD2 = load %struct.foo** @X, align 4 ; <%struct.foo*> [#uses=1]
+
+ %exitcond = icmp eq i32 %indvar.next, 1200 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb2, label %bb1
+
+bb2: ; preds = %bb1
+ ret i32 %tmp3
+}
More information about the llvm-commits
mailing list