[llvm-commits] [llvm] r41933 - /llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp

Chris Lattner sabre at nondot.org
Thu Sep 13 14:31:37 PDT 2007


Author: lattner
Date: Thu Sep 13 16:31:36 2007
New Revision: 41933

URL: http://llvm.org/viewvc/llvm-project?rev=41933&view=rev
Log:
Teach GlobalLoadUsesSimpleEnoughForHeapSRA and the SROA rewriter how to handle
a limited form of PHI nodes.  This finally fixes PR1639, speeding 179.art up
from 7.84s to 3.13s on PPC.

Modified:
    llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp

Modified: llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp?rev=41933&r1=41932&r2=41933&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/GlobalOpt.cpp Thu Sep 13 16:31:36 2007
@@ -867,7 +867,8 @@
 
 /// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
 /// GV are simple enough to perform HeapSRA, return true.
-static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
+static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
+                                                 MallocInst *MI) {
   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
        ++UI)
     if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
@@ -883,15 +884,35 @@
         }
         
         // getelementptr is also ok, but only a simple form.
-        GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI);
-        if (!GEPI) return false;
+        if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
+          // Must index into the array and into the struct.
+          if (GEPI->getNumOperands() < 3)
+            return false;
+          
+          // Otherwise the GEP is ok.
+          continue;
+        }
         
-        // Must index into the array and into the struct.
-        if (GEPI->getNumOperands() < 3)
-          return false;
+        if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
+          // We have a phi of a load from the global.  We can only handle this
+          // if the other PHI'd values are actually the same.  In this case,
+          // the rewriter will just drop the phi entirely.
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+            Value *IV = PN->getIncomingValue(i);
+            if (IV == LI) continue;  // Trivial the same.
+            
+            // If the phi'd value is from the malloc that initializes the value,
+            // we can xform it.
+            if (IV == MI) continue;
+            
+            // Otherwise, we don't know what it is.
+            return false;
+          }
+          return true;
+        }
         
-        // Otherwise the GEP is ok.
-        continue;
+        // Otherwise we don't know what this is, not ok.
+        return false;
       }
     }
   return true;
@@ -899,7 +920,7 @@
 
 /// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
 /// value, lazily creating it on demand.
-static Value *GetHeapSROALoad(LoadInst *Load, unsigned FieldNo,
+static Value *GetHeapSROALoad(Instruction *Load, unsigned FieldNo,
                               const std::vector<GlobalVariable*> &FieldGlobals,
                               std::vector<Value *> &InsertedLoadsForPtr) {
   if (InsertedLoadsForPtr.size() <= FieldNo)
@@ -958,12 +979,39 @@
     return;
   }
   
-  // Handle PHI nodes.  All PHI nodes must be merging in the same values, so
-  // just treat them like a copy.
+  // Handle PHI nodes.  PHI nodes must be merging in the same values, plus
+  // potentially the original malloc.  Insert phi nodes for each field, then
+  // process uses of the PHI.
   PHINode *PN = cast<PHINode>(LoadUser);
+  std::vector<Value *> PHIsForField;
+  PHIsForField.resize(FieldGlobals.size());
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *LoadV = GetHeapSROALoad(Load, i, FieldGlobals, InsertedLoadsForPtr);
+
+    PHINode *FieldPN = new PHINode(LoadV->getType(),
+                                   PN->getName()+"."+utostr(i), PN);
+    // Fill in the predecessor values.
+    for (unsigned pred = 0, e = PN->getNumIncomingValues(); pred != e; ++pred) {
+      // Each predecessor either uses the load or the original malloc.
+      Value *InVal = PN->getIncomingValue(pred);
+      BasicBlock *BB = PN->getIncomingBlock(pred);
+      Value *NewVal;
+      if (isa<MallocInst>(InVal)) {
+        // Insert a reload from the global in the predecessor.
+        NewVal = GetHeapSROALoad(BB->getTerminator(), i, FieldGlobals,
+                                 PHIsForField);
+      } else {
+        NewVal = InsertedLoadsForPtr[i];
+      }
+      FieldPN->addIncoming(NewVal, BB);
+    }
+    PHIsForField[i] = FieldPN;
+  }
+  
+  // Since PHIsForField specifies a phi for every input value, the lazy inserter
+  // will never insert a load.
   while (!PN->use_empty())
-    RewriteHeapSROALoadUser(Load, PN->use_back(),
-                            FieldGlobals, InsertedLoadsForPtr);
+    RewriteHeapSROALoadUser(Load, PN->use_back(), FieldGlobals, PHIsForField);
   PN->eraseFromParent();
 }
 
@@ -1193,7 +1241,7 @@
         // This the structure has an unreasonable number of fields, leave it
         // alone.
         if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
-            GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) {
+            GlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
           GVI = PerformHeapAllocSRoA(GV, MI);
           return true;
         }





More information about the llvm-commits mailing list