[llvm-commits] [llvm] r61123 - in /llvm/trunk: lib/Transforms/IPO/GlobalOpt.cpp test/Transforms/GlobalOpt/heap-sra-phi.ll

Chris Lattner sabre at nondot.org
Tue Dec 16 21:28:55 PST 2008


Author: lattner
Date: Tue Dec 16 23:28:49 2008
New Revision: 61123

URL: http://llvm.org/viewvc/llvm-project?rev=61123&view=rev
Log:
Enhance heap sra to be substantially more aggressive w.r.t PHI
nodes.  This allows it to do fairly general phi insertion if a 
load from a pointer global wants to be SRAd but the load is used
by (recursive) phi nodes.  This fixes a pessimization on ppc
introduced by Load PRE.

Added:
    llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll
Modified:
    llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp

Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=61123&r1=61122&r2=61123&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Tue Dec 16 23:28:49 2008
@@ -34,6 +34,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -1015,8 +1016,8 @@
 /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
 /// that index through the array and struct field, icmps of null, and PHIs.
-static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, MallocInst *MI,
-                             SmallPtrSet<PHINode*, 32> &AnalyzedLoadUsingPHIs) {
+static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
+                                     SmallPtrSet<PHINode*, 32> &LoadUsingPHIs) {
   // We permit two users of the load: setcc comparing against the null
   // pointer, and a getelementptr of a specific form.
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
@@ -1041,25 +1042,11 @@
     
     if (PHINode *PN = dyn_cast<PHINode>(User)) {
       // If we have already recursively analyzed this PHI, then it is safe.
-      if (AnalyzedLoadUsingPHIs.insert(PN))
+      if (LoadUsingPHIs.insert(PN))
         continue;
       
-      // We have a phi of a load from the global.  We can only handle this
-      // if the other PHI'd values are actually the same.  In this case,
-      // the rewriter will just drop the phi entirely.
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        Value *IV = PN->getIncomingValue(i);
-        if (IV == V) continue;  // Trivial the same.
-        
-        // If the phi'd value is from the malloc that initializes the value,
-        // we can xform it.
-        if (IV == MI) continue;
-        
-        // Otherwise, we don't know what it is.
-        return false;
-      }
-      
-      if (!LoadUsesSimpleEnoughForHeapSRA(PN, MI, AnalyzedLoadUsingPHIs))
+      // Make sure all uses of the PHI are simple enough to transform.
+      if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs))
         return false;
       
       continue;
@@ -1077,45 +1064,100 @@
 /// GV are simple enough to perform HeapSRA, return true.
 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
                                                     MallocInst *MI) {
-  SmallPtrSet<PHINode*, 32> AnalyzedLoadUsingPHIs;
+  SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
        ++UI)
     if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
-      if (!LoadUsesSimpleEnoughForHeapSRA(LI, MI, AnalyzedLoadUsingPHIs))
+      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs))
+        return false;
+  
+  // If we reach here, we know that all uses of the loads and transitive uses
+  // (through PHI nodes) are simple enough to transform.  However, we don't know
+  // that all inputs the to the PHI nodes are in the same equivalence sets. 
+  // Check to verify that all operands of the PHIs are either PHIS that can be
+  // transformed, loads from GV, or MI itself.
+  for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(),
+       E = LoadUsingPHIs.end(); I != E; ++I) {
+    PHINode *PN = *I;
+    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+      Value *InVal = PN->getIncomingValue(op);
+      
+      // PHI of the stored value itself is ok.
+      if (InVal == MI) continue;
+      
+      if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+        // One of the PHIs in our set is (optimistically) ok.
+        if (LoadUsingPHIs.count(InPN))
+          continue;
         return false;
+      }
+      
+      // Load from GV is ok.
+      if (LoadInst *LI = dyn_cast<LoadInst>(InVal))
+        if (LI->getOperand(0) == GV)
+          continue;
+      
+      // UNDEF? NULL?
+      
+      // Anything else is rejected.
+      return false;
+    }
+  }
+  
   return true;
 }
 
-/// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
-/// value, lazily creating it on demand.
-static Value *GetHeapSROALoad(Instruction *Load, unsigned FieldNo,
-                              const std::vector<GlobalVariable*> &FieldGlobals,
-                              std::vector<Value *> &InsertedLoadsForPtr) {
-  if (InsertedLoadsForPtr.size() <= FieldNo)
-    InsertedLoadsForPtr.resize(FieldNo+1);
-  if (InsertedLoadsForPtr[FieldNo] == 0)
-    InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo],
-                                                Load->getName()+".f" + 
-                                                utostr(FieldNo), Load);
-  return InsertedLoadsForPtr[FieldNo];
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+  
+  if (FieldNo >= FieldVals.size())
+    FieldVals.resize(FieldNo+1);
+  
+  // If we already have this value, just reuse the previously scalarized
+  // version.
+  if (Value *FieldVal = FieldVals[FieldNo])
+    return FieldVal;
+  
+  // Depending on what instruction this is, we have several cases.
+  Value *Result;
+  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    // This is a scalarized version of the load from the global.  Just create
+    // a new Load of the scalarized global.
+    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                           InsertedScalarizedValues,
+                                           PHIsToRewrite),
+                          LI->getName()+".f" + utostr(FieldNo), LI);
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
+    // field.
+    const StructType *ST = 
+      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+    
+    Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                            PN->getName()+".f"+utostr(FieldNo), PN);
+    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+  } else {
+    assert(0 && "Unknown usable value");
+    Result = 0;
+  }
+  
+  return FieldVals[FieldNo] = Result;
 }
 
 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
 /// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(LoadInst *Load, Instruction *LoadUser, 
-                               const std::vector<GlobalVariable*> &FieldGlobals,
-                                    std::vector<Value *> &InsertedLoadsForPtr) {
+static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
+             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   // If this is a comparison against null, handle it.
   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
     // If we have a setcc of the loaded pointer, we can use a setcc of any
     // field.
-    Value *NPtr;
-    if (InsertedLoadsForPtr.empty()) {
-      NPtr = GetHeapSROALoad(Load, 0, FieldGlobals, InsertedLoadsForPtr);
-    } else {
-      NPtr = InsertedLoadsForPtr.back();
-    }
+    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+                                   InsertedScalarizedValues, PHIsToRewrite);
     
     Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
                               Constant::getNullValue(NPtr->getType()),
@@ -1125,15 +1167,15 @@
     return;
   }
   
-  // Handle 'getelementptr Ptr, Idx, uint FieldNo ...'
+  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
            && "Unexpected GEPI!");
   
     // Load the pointer for this field.
     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
-    Value *NewPtr = GetHeapSROALoad(Load, FieldNo,
-                                    FieldGlobals, InsertedLoadsForPtr);
+    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+                                     InsertedScalarizedValues, PHIsToRewrite);
     
     // Create the new GEP idx vector.
     SmallVector<Value*, 8> GEPIdx;
@@ -1147,41 +1189,26 @@
     GEPI->eraseFromParent();
     return;
   }
-  
-  // Handle PHI nodes.  PHI nodes must be merging in the same values, plus
-  // potentially the original malloc.  Insert phi nodes for each field, then
-  // process uses of the PHI.
-  PHINode *PN = cast<PHINode>(LoadUser);
-  std::vector<Value *> PHIsForField;
-  PHIsForField.resize(FieldGlobals.size());
-  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-    Value *LoadV = GetHeapSROALoad(Load, i, FieldGlobals, InsertedLoadsForPtr);
 
-    PHINode *FieldPN = PHINode::Create(LoadV->getType(),
-                                       PN->getName()+"."+utostr(i), PN);
-    // Fill in the predecessor values.
-    for (unsigned pred = 0, e = PN->getNumIncomingValues(); pred != e; ++pred) {
-      // Each predecessor either uses the load or the original malloc.
-      Value *InVal = PN->getIncomingValue(pred);
-      BasicBlock *BB = PN->getIncomingBlock(pred);
-      Value *NewVal;
-      if (isa<MallocInst>(InVal)) {
-        // Insert a reload from the global in the predecessor.
-        NewVal = GetHeapSROALoad(BB->getTerminator(), i, FieldGlobals,
-                                 PHIsForField);
-      } else {
-        NewVal = InsertedLoadsForPtr[i];
-      }
-      FieldPN->addIncoming(NewVal, BB);
-    }
-    PHIsForField[i] = FieldPN;
-  }
-  
-  // Since PHIsForField specifies a phi for every input value, the lazy inserter
-  // will never insert a load.
-  while (!PN->use_empty())
-    RewriteHeapSROALoadUser(Load, PN->use_back(), FieldGlobals, PHIsForField);
-  PN->eraseFromParent();
+  // Recursively transform the users of PHI nodes.  This will lazily create the
+  // PHIs that are needed for individual elements.  Keep track of what PHIs we
+  // see in InsertedScalarizedValues so that we don't get infinite loops (very
+  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
+  // already been seen first by another load, so its uses have already been
+  // processed.
+  PHINode *PN = cast<PHINode>(LoadUser);
+  bool Inserted;
+  DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+  tie(InsertPos, Inserted) =
+    InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+  if (!Inserted) return;
+  
+  // If this is the first time we've seen this PHI, recursively process all
+  // users.
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E;
+       ++UI)
+    RewriteHeapSROALoadUser(cast<Instruction>(*UI), InsertedScalarizedValues,
+                            PHIsToRewrite);
 }
 
 /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
@@ -1189,12 +1216,17 @@
 /// use FieldGlobals instead.  All uses of loaded values satisfy
 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
-                             const std::vector<GlobalVariable*> &FieldGlobals) {
-  std::vector<Value *> InsertedLoadsForPtr;
-  //InsertedLoadsForPtr.resize(FieldGlobals.size());
-  while (!Load->use_empty())
-    RewriteHeapSROALoadUser(Load, Load->use_back(), 
-                            FieldGlobals, InsertedLoadsForPtr);
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+       UI != E; )
+    RewriteHeapSROALoadUser(cast<Instruction>(*UI++), InsertedScalarizedValues,
+                            PHIsToRewrite);
+  
+  if (Load->use_empty()) {
+    Load->eraseFromParent();
+    InsertedScalarizedValues.erase(Load);
+  }
 }
 
 /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
@@ -1211,7 +1243,7 @@
   
   // Okay, at this point, there are no users of the malloc.  Insert N
   // new mallocs at the same place as MI, and N globals.
-  std::vector<GlobalVariable*> FieldGlobals;
+  std::vector<Value*> FieldGlobals;
   std::vector<MallocInst*> FieldMallocs;
   
   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
@@ -1293,37 +1325,82 @@
   // MI is no longer needed, remove it.
   MI->eraseFromParent();
 
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+  
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
   
   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   // of the per-field globals instead.
-  while (!GV->use_empty()) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
-      RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals);
-      LI->eraseFromParent();
-    } else {
-      // Must be a store of null.
-      StoreInst *SI = cast<StoreInst>(GV->use_back());
-      assert(isa<Constant>(SI->getOperand(0)) &&
-             cast<Constant>(SI->getOperand(0))->isNullValue() &&
-             "Unexpected heap-sra user!");
-      
-      // Insert a store of null into each global.
-      for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-        Constant *Null = 
-          Constant::getNullValue(FieldGlobals[i]->getType()->getElementType());
-        new StoreInst(Null, FieldGlobals[i], SI);
-      }
-      // Erase the original store.
-      SI->eraseFromParent();
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      continue;
+    }
+    
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+    
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
     }
   }
-
+  
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+  
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+  
   // The old global is now dead, remove it.
   GV->eraseFromParent();
 
   ++NumHeapSRA;
-  return FieldGlobals[0];
+  return cast<GlobalVariable>(FieldGlobals[0]);
 }
 
 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a

Added: llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll?rev=61123&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll (added)
+++ llvm/trunk/test/Transforms/GlobalOpt/heap-sra-phi.ll Tue Dec 16 23:28:49 2008
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f1 = phi i32. }
+; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {tmp.f0 = phi i32. }
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.foo = type { i32, i32 }
+ at X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+        %tmp = phi %struct.foo* [%tmpLD1, %bb1.thread ], [ %tmpLD2, %bb1 ]		; <i32> [#uses=2]
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %tmp3, %bb1 ]		; <i32> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 0		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp6 = add i32 %tmp2, %sum.0.reg2mem.0		; <i32> [#uses=2]
+	%tmp4 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 1		; <i32*> [#uses=1]
+        %tmp5 = load i32 * %tmp4
+        %tmp3 = add i32 %tmp5, %tmp6
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+        
+      	%tmpLD2 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+
+	%exitcond = icmp eq i32 %indvar.next, 1200		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %tmp3
+}





More information about the llvm-commits mailing list